1
# Copyright (C) 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from __future__ import absolute_import
21
from ...lazy_import import lazy_import
22
lazy_import(globals(), """
23
from fnmatch import fnmatch
25
from breezy._termcolor import color_string, FG
32
revision as _mod_revision,
39
from breezy.revisionspec import (
44
from breezy.sixish import (
48
_user_encoding = osutils.get_user_encoding()
51
class _RevisionNotLinear(Exception):
52
"""Raised when a revision is not on left-hand history."""
55
def _rev_on_mainline(rev_tuple):
56
"""returns True is rev tuple is on mainline"""
57
if len(rev_tuple) == 1:
59
return rev_tuple[1] == 0 and rev_tuple[2] == 0
62
# NOTE: _linear_view_revisions is basided on
63
# breezy.log._linear_view_revisions.
64
# This should probably be a common public API
65
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
66
# requires that start is older than end
67
repo = branch.repository
68
graph = repo.get_graph()
69
for revision_id in graph.iter_lefthand_ancestry(
70
end_rev_id, (_mod_revision.NULL_REVISION, )):
71
revno = branch.revision_id_to_dotted_revno(revision_id)
72
revno_str = '.'.join(str(n) for n in revno)
73
if revision_id == start_rev_id:
74
yield revision_id, revno_str, 0
76
yield revision_id, revno_str, 0
79
# NOTE: _graph_view_revisions is copied from
80
# breezy.log._graph_view_revisions.
81
# This should probably be a common public API
82
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
83
rebase_initial_depths=True):
84
"""Calculate revisions to view including merges, newest to oldest.
86
:param branch: the branch
87
:param start_rev_id: the lower revision-id
88
:param end_rev_id: the upper revision-id
89
:param rebase_initial_depth: should depths be rebased until a mainline
91
:return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
93
# requires that start is older than end
94
view_revisions = branch.iter_merge_sorted_revisions(
95
start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
96
stop_rule="with-merges")
97
if not rebase_initial_depths:
98
for (rev_id, merge_depth, revno, end_of_merge
100
yield rev_id, '.'.join(map(str, revno)), merge_depth
102
# We're following a development line starting at a merged revision.
103
# We need to adjust depths down by the initial depth until we find
104
# a depth less than it. Then we use that depth as the adjustment.
105
# If and when we reach the mainline, depth adjustment ends.
106
depth_adjustment = None
107
for (rev_id, merge_depth, revno, end_of_merge
109
if depth_adjustment is None:
110
depth_adjustment = merge_depth
112
if merge_depth < depth_adjustment:
113
# From now on we reduce the depth adjustement, this can be
114
# surprising for users. The alternative requires two passes
115
# which breaks the fast display of the first revision
117
depth_adjustment = merge_depth
118
merge_depth -= depth_adjustment
119
yield rev_id, '.'.join(map(str, revno)), merge_depth
122
def compile_pattern(pattern, flags=0):
125
# use python's re.compile as we need to catch re.error in case of bad pattern
126
lazy_regex.reset_compile()
127
patternc = re.compile(pattern, flags)
128
except re.error as e:
129
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
133
def is_fixed_string(s):
134
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
139
class _GrepDiffOutputter(object):
140
"""Precalculate formatting based on options given for diff grep.
143
def __init__(self, opts):
145
self.outf = opts.outf
147
if opts.fixed_string:
148
self._old = opts.pattern
149
self._new = color_string(opts.pattern, FG.BOLD_RED)
150
self.get_writer = self._get_writer_fixed_highlighted
152
flags = opts.patternc.flags
153
self._sub = re.compile(opts.pattern.join(("((?:", ")+)")), flags).sub
154
self._highlight = color_string("\\1", FG.BOLD_RED)
155
self.get_writer = self._get_writer_regexp_highlighted
157
self.get_writer = self._get_writer_plain
159
def get_file_header_writer(self):
160
"""Get function for writing file headers"""
161
write = self.outf.write
162
eol_marker = self.opts.eol_marker
163
def _line_writer(line):
164
write(line + eol_marker)
165
def _line_writer_color(line):
166
write(FG.BOLD_MAGENTA + line + FG.NONE + eol_marker)
167
if self.opts.show_color:
168
return _line_writer_color
173
def get_revision_header_writer(self):
174
"""Get function for writing revno lines"""
175
write = self.outf.write
176
eol_marker = self.opts.eol_marker
177
def _line_writer(line):
178
write(line + eol_marker)
179
def _line_writer_color(line):
180
write(FG.BOLD_BLUE + line + FG.NONE + eol_marker)
181
if self.opts.show_color:
182
return _line_writer_color
187
def _get_writer_plain(self):
188
"""Get function for writing uncoloured output"""
189
write = self.outf.write
190
eol_marker = self.opts.eol_marker
191
def _line_writer(line):
192
write(line + eol_marker)
195
def _get_writer_regexp_highlighted(self):
196
"""Get function for writing output with regexp match highlighted"""
197
_line_writer = self._get_writer_plain()
198
sub, highlight = self._sub, self._highlight
199
def _line_writer_regexp_highlighted(line):
200
"""Write formatted line with matched pattern highlighted"""
201
return _line_writer(line=sub(highlight, line))
202
return _line_writer_regexp_highlighted
204
def _get_writer_fixed_highlighted(self):
205
"""Get function for writing output with search string highlighted"""
206
_line_writer = self._get_writer_plain()
207
old, new = self._old, self._new
208
def _line_writer_fixed_highlighted(line):
209
"""Write formatted line with string searched for highlighted"""
210
return _line_writer(line=line.replace(old, new))
211
return _line_writer_fixed_highlighted
215
wt, branch, relpath = \
216
controldir.ControlDir.open_containing_tree_or_branch('.')
217
with branch.lock_read():
219
start_rev = opts.revision[0]
221
# if no revision is sepcified for diff grep we grep all changesets.
222
opts.revision = [RevisionSpec.from_string('revno:1'),
223
RevisionSpec.from_string('last:1')]
224
start_rev = opts.revision[0]
225
start_revid = start_rev.as_revision_id(branch)
226
if start_revid == b'null:':
228
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
229
if len(opts.revision) == 2:
230
end_rev = opts.revision[1]
231
end_revid = end_rev.as_revision_id(branch)
232
if end_revid is None:
233
end_revno, end_revid = branch.last_revision_info()
234
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
236
grep_mainline = (_rev_on_mainline(srevno_tuple) and
237
_rev_on_mainline(erevno_tuple))
239
# ensure that we go in reverse order
240
if srevno_tuple > erevno_tuple:
241
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
242
start_revid, end_revid = end_revid, start_revid
244
# Optimization: Traversing the mainline in reverse order is much
245
# faster when we don't want to look at merged revs. We try this
246
# with _linear_view_revisions. If all revs are to be grepped we
247
# use the slower _graph_view_revisions
248
if opts.levels==1 and grep_mainline:
249
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
251
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
253
# We do an optimization below. For grepping a specific revison
254
# We don't need to call _graph_view_revisions which is slow.
255
# We create the start_rev_tuple for only that specific revision.
256
# _graph_view_revisions is used only for revision range.
257
start_revno = '.'.join(map(str, srevno_tuple))
258
start_rev_tuple = (start_revid, start_revno, 0)
259
given_revs = [start_rev_tuple]
260
repo = branch.repository
261
diff_pattern = re.compile(b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
262
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
263
outputter = _GrepDiffOutputter(opts)
264
writeline = outputter.get_writer()
265
writerevno = outputter.get_revision_header_writer()
266
writefileheader = outputter.get_file_header_writer()
267
file_encoding = _user_encoding
268
for revid, revno, merge_depth in given_revs:
269
if opts.levels == 1 and merge_depth != 0:
270
# with level=1 show only top level
273
rev_spec = RevisionSpec_revid.from_string("revid:"+revid.decode('utf-8'))
274
new_rev = repo.get_revision(revid)
275
new_tree = rev_spec.as_tree(branch)
276
if len(new_rev.parent_ids) == 0:
277
ancestor_id = _mod_revision.NULL_REVISION
279
ancestor_id = new_rev.parent_ids[0]
280
old_tree = repo.revision_tree(ancestor_id)
282
diff.show_diff_trees(old_tree, new_tree, s,
283
old_label='', new_label='')
288
for line in text.splitlines():
289
if file_pattern.search(line):
292
elif diff_pattern.search(line):
294
writerevno("=== revno:%s ===" % (revno,))
295
display_revno = False
297
writefileheader(" %s" % (file_header.decode(file_encoding, 'replace'),))
299
line = line.decode(file_encoding, 'replace')
300
writeline(" %s" % (line,))
303
def versioned_grep(opts):
304
wt, branch, relpath = \
305
controldir.ControlDir.open_containing_tree_or_branch('.')
306
with branch.lock_read():
307
start_rev = opts.revision[0]
308
start_revid = start_rev.as_revision_id(branch)
309
if start_revid is None:
310
start_rev = RevisionSpec_revno.from_string("revno:1")
311
start_revid = start_rev.as_revision_id(branch)
312
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
314
if len(opts.revision) == 2:
315
end_rev = opts.revision[1]
316
end_revid = end_rev.as_revision_id(branch)
317
if end_revid is None:
318
end_revno, end_revid = branch.last_revision_info()
319
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
321
grep_mainline = (_rev_on_mainline(srevno_tuple) and
322
_rev_on_mainline(erevno_tuple))
324
# ensure that we go in reverse order
325
if srevno_tuple > erevno_tuple:
326
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
327
start_revid, end_revid = end_revid, start_revid
329
# Optimization: Traversing the mainline in reverse order is much
330
# faster when we don't want to look at merged revs. We try this
331
# with _linear_view_revisions. If all revs are to be grepped we
332
# use the slower _graph_view_revisions
333
if opts.levels == 1 and grep_mainline:
334
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
336
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
338
# We do an optimization below. For grepping a specific revison
339
# We don't need to call _graph_view_revisions which is slow.
340
# We create the start_rev_tuple for only that specific revision.
341
# _graph_view_revisions is used only for revision range.
342
start_revno = '.'.join(map(str, srevno_tuple))
343
start_rev_tuple = (start_revid, start_revno, 0)
344
given_revs = [start_rev_tuple]
346
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
347
opts.outputter = _Outputter(opts, use_cache=True)
349
for revid, revno, merge_depth in given_revs:
350
if opts.levels == 1 and merge_depth != 0:
351
# with level=1 show only top level
354
rev = RevisionSpec_revid.from_string("revid:"+revid.decode('utf-8'))
355
tree = rev.as_tree(branch)
356
for path in opts.path_list:
357
tree_path = osutils.pathjoin(relpath, path)
358
if not tree.has_filename(tree_path):
359
trace.warning("Skipped unknown file '%s'.", path)
362
if osutils.isdir(path):
364
dir_grep(tree, path, relpath, opts, revno, path_prefix)
366
versioned_file_grep(tree, tree_path, '.', path, opts, revno)
369
def workingtree_grep(opts):
370
revno = opts.print_revno = None # for working tree set revno to None
372
tree, branch, relpath = \
373
controldir.ControlDir.open_containing_tree_or_branch('.')
375
msg = ('Cannot search working tree. Working tree not found.\n'
376
'To search for specific revision in history use the -r option.')
377
raise errors.BzrCommandError(msg)
379
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
380
opts.outputter = _Outputter(opts)
382
with tree.lock_read():
383
for path in opts.path_list:
384
if osutils.isdir(path):
386
dir_grep(tree, path, relpath, opts, revno, path_prefix)
388
with open(path, 'rb') as f:
389
_file_grep(f.read(), path, opts, revno)
392
def _skip_file(include, exclude, path):
393
if include and not _path_in_glob_list(path, include):
395
if exclude and _path_in_glob_list(path, exclude):
400
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
401
# setup relpath to open files relative to cwd
404
rpath = osutils.pathjoin('..', relpath)
406
from_dir = osutils.pathjoin(relpath, path)
408
# start searching recursively from root
413
to_grep_append = to_grep.append
414
# GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
415
# and hits manually refilled. Could do this again if it was
416
# for a good reason, otherwise cache might want purging.
417
outputter = opts.outputter
418
for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
419
from_dir=from_dir, recursive=opts.recursive):
421
if _skip_file(opts.include, opts.exclude, fp):
424
if fc == 'V' and fkind == 'file':
425
tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
426
if revno is not None:
427
# If old result is valid, print results immediately.
428
# Otherwise, add file info to to_grep so that the
429
# loop later will get chunks and grep them
430
cache_id = tree.get_file_revision(tree_path)
431
if cache_id in outputter.cache:
432
# GZ 2010-06-05: Not really sure caching and re-outputting
433
# the old path is really the right thing,
434
# but it's what the old code seemed to do
435
outputter.write_cached_lines(cache_id, revno)
437
to_grep_append((tree_path, (fp, tree_path)))
439
# we are grepping working tree.
443
path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
444
if opts.files_with_matches or opts.files_without_match:
445
# Optimize for wtree list-only as we don't need to read the
447
with open(path_for_file, 'rb', buffering=4096) as file:
448
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
450
with open(path_for_file, 'rb') as f:
451
_file_grep(f.read(), fp, opts, revno, path_prefix)
453
if revno is not None: # grep versioned files
454
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
455
path = _make_display_path(relpath, path)
456
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
457
tree.get_file_revision(tree_path))
460
def _make_display_path(relpath, path):
461
"""Return path string relative to user cwd.
463
Take tree's 'relpath' and user supplied 'path', and return path
464
that can be displayed to the user.
467
# update path so to display it w.r.t cwd
468
# handle windows slash separator
469
path = osutils.normpath(osutils.pathjoin(relpath, path))
470
path = path.replace('\\', '/')
471
path = path.replace(relpath + '/', '', 1)
475
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix = None):
476
"""Create a file object for the specified id and pass it on to _file_grep.
479
path = _make_display_path(relpath, path)
480
file_text = tree.get_file_text(tree_path)
481
_file_grep(file_text, path, opts, revno, path_prefix)
484
def _path_in_glob_list(path, glob_list):
485
for glob in glob_list:
486
if fnmatch(path, glob):
491
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
492
# test and skip binary files
493
if b'\x00' in file.read(1024):
495
trace.warning("Binary file '%s' skipped.", path)
498
file.seek(0) # search from beginning
501
if opts.fixed_string:
502
pattern = opts.pattern.encode(_user_encoding, 'replace')
507
else: # not fixed_string
509
if opts.patternc.search(line):
513
if (opts.files_with_matches and found) or \
514
(opts.files_without_match and not found):
515
if path_prefix and path_prefix != '.':
516
# user has passed a dir arg, show that as result prefix
517
path = osutils.pathjoin(path_prefix, path)
518
opts.outputter.get_writer(path, None, None)()
521
class _Outputter(object):
522
"""Precalculate formatting based on options given
524
The idea here is to do this work only once per run, and finally return a
525
function that will do the minimum amount possible for each match.
527
def __init__(self, opts, use_cache=False):
528
self.outf = opts.outf
530
# self.cache is used to cache results for dir grep based on fid.
531
# If the fid is does not change between results, it means that
532
# the result will be the same apart from revno. In such a case
533
# we avoid getting file chunks from repo and grepping. The result
534
# is just printed by replacing old revno with new one.
538
no_line = opts.files_with_matches or opts.files_without_match
542
self.get_writer = self._get_writer_plain
543
elif opts.fixed_string:
544
self._old = opts.pattern
545
self._new = color_string(opts.pattern, FG.BOLD_RED)
546
self.get_writer = self._get_writer_fixed_highlighted
548
flags = opts.patternc.flags
549
self._sub = re.compile(opts.pattern.join(("((?:", ")+)")), flags).sub
550
self._highlight = color_string("\\1", FG.BOLD_RED)
551
self.get_writer = self._get_writer_regexp_highlighted
552
path_start = FG.MAGENTA
554
sep = color_string(':', FG.BOLD_CYAN)
555
rev_sep = color_string('~', FG.BOLD_YELLOW)
557
self.get_writer = self._get_writer_plain
558
path_start = path_end = ""
562
parts = [path_start, "%(path)s"]
564
parts.extend([rev_sep, "%(revno)s"])
565
self._format_initial = "".join(parts)
568
if not opts.print_revno:
569
parts.append(path_end)
572
parts.extend([sep, "%(lineno)s"])
573
parts.extend([sep, "%(line)s"])
574
parts.append(opts.eol_marker)
575
self._format_perline = "".join(parts)
577
def _get_writer_plain(self, path, revno, cache_id):
578
"""Get function for writing uncoloured output"""
579
per_line = self._format_perline
580
start = self._format_initial % {"path":path, "revno":revno}
581
write = self.outf.write
582
if self.cache is not None and cache_id is not None:
584
self.cache[cache_id] = path, result_list
585
add_to_cache = result_list.append
586
def _line_cache_and_writer(**kwargs):
587
"""Write formatted line and cache arguments"""
588
end = per_line % kwargs
591
return _line_cache_and_writer
592
def _line_writer(**kwargs):
593
"""Write formatted line from arguments given by underlying opts"""
594
write(start + per_line % kwargs)
597
def write_cached_lines(self, cache_id, revno):
598
"""Write cached results out again for new revision"""
599
cached_path, cached_matches = self.cache[cache_id]
600
start = self._format_initial % {"path":cached_path, "revno":revno}
601
write = self.outf.write
602
for end in cached_matches:
605
def _get_writer_regexp_highlighted(self, path, revno, cache_id):
606
"""Get function for writing output with regexp match highlighted"""
607
_line_writer = self._get_writer_plain(path, revno, cache_id)
608
sub, highlight = self._sub, self._highlight
609
def _line_writer_regexp_highlighted(line, **kwargs):
610
"""Write formatted line with matched pattern highlighted"""
611
return _line_writer(line=sub(highlight, line), **kwargs)
612
return _line_writer_regexp_highlighted
614
def _get_writer_fixed_highlighted(self, path, revno, cache_id):
615
"""Get function for writing output with search string highlighted"""
616
_line_writer = self._get_writer_plain(path, revno, cache_id)
617
old, new = self._old, self._new
618
def _line_writer_fixed_highlighted(line, **kwargs):
619
"""Write formatted line with string searched for highlighted"""
620
return _line_writer(line=line.replace(old, new), **kwargs)
621
return _line_writer_fixed_highlighted
624
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
625
# test and skip binary files
626
if b'\x00' in file_text[:1024]:
628
trace.warning("Binary file '%s' skipped.", path)
631
if path_prefix and path_prefix != '.':
632
# user has passed a dir arg, show that as result prefix
633
path = osutils.pathjoin(path_prefix, path)
635
# GZ 2010-06-07: There's no actual guarentee the file contents will be in
636
# the user encoding, but we have to guess something and it
637
# is a reasonable default without a better mechanism.
638
file_encoding = _user_encoding
639
pattern = opts.pattern.encode(_user_encoding, 'replace')
641
writeline = opts.outputter.get_writer(path, revno, cache_id)
643
if opts.files_with_matches or opts.files_without_match:
644
if opts.fixed_string:
645
found = pattern in file_text
647
search = opts.patternc.search
648
if b"$" not in pattern:
649
found = search(file_text) is not None
651
for line in file_text.splitlines():
657
if (opts.files_with_matches and found) or \
658
(opts.files_without_match and not found):
660
elif opts.fixed_string:
661
# Fast path for no match, search through the entire file at once rather
662
# than a line at a time. <http://effbot.org/zone/stringlib.htm>
663
i = file_text.find(pattern)
666
b = file_text.rfind(b"\n", 0, i) + 1
668
start = file_text.count(b"\n", 0, b) + 1
669
file_text = file_text[b:]
671
for index, line in enumerate(file_text.splitlines()):
673
line = line.decode(file_encoding, 'replace')
674
writeline(lineno=index+start, line=line)
676
for line in file_text.splitlines():
678
line = line.decode(file_encoding, 'replace')
681
# Fast path on no match, the re module avoids bad behaviour in most
682
# standard cases, but perhaps could try and detect backtracking
683
# patterns here and avoid whole text search in those cases
684
search = opts.patternc.search
685
if b"$" not in pattern:
686
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
687
# through revisions as bazaar returns binary mode
688
# and trailing \r breaks $ as line ending match
689
m = search(file_text)
692
b = file_text.rfind(b"\n", 0, m.start()) + 1
694
start = file_text.count(b"\n", 0, b) + 1
695
file_text = file_text[b:]
699
for index, line in enumerate(file_text.splitlines()):
701
line = line.decode(file_encoding, 'replace')
702
writeline(lineno=index+start, line=line)
704
for line in file_text.splitlines():
706
line = line.decode(file_encoding, 'replace')