1
# Copyright (C) 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from __future__ import absolute_import
21
from ...lazy_import import lazy_import
22
lazy_import(globals(), """
23
from fnmatch import fnmatch
25
from breezy._termcolor import color_string, FG
32
revision as _mod_revision,
39
from breezy.revisionspec import (
44
from breezy.sixish import (
48
_user_encoding = osutils.get_user_encoding()
51
class _RevisionNotLinear(Exception):
52
"""Raised when a revision is not on left-hand history."""
55
def _rev_on_mainline(rev_tuple):
56
"""returns True is rev tuple is on mainline"""
57
if len(rev_tuple) == 1:
59
return rev_tuple[1] == 0 and rev_tuple[2] == 0
62
# NOTE: _linear_view_revisions is basided on
63
# breezy.log._linear_view_revisions.
64
# This should probably be a common public API
65
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
66
# requires that start is older than end
67
repo = branch.repository
68
graph = repo.get_graph()
69
for revision_id in graph.iter_lefthand_ancestry(
70
end_rev_id, (_mod_revision.NULL_REVISION, )):
71
revno = branch.revision_id_to_dotted_revno(revision_id)
72
revno_str = '.'.join(str(n) for n in revno)
73
if revision_id == start_rev_id:
74
yield revision_id, revno_str, 0
76
yield revision_id, revno_str, 0
79
# NOTE: _graph_view_revisions is copied from
80
# breezy.log._graph_view_revisions.
81
# This should probably be a common public API
82
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
83
rebase_initial_depths=True):
84
"""Calculate revisions to view including merges, newest to oldest.
86
:param branch: the branch
87
:param start_rev_id: the lower revision-id
88
:param end_rev_id: the upper revision-id
89
:param rebase_initial_depth: should depths be rebased until a mainline
91
:return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
93
# requires that start is older than end
94
view_revisions = branch.iter_merge_sorted_revisions(
95
start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
96
stop_rule="with-merges")
97
if not rebase_initial_depths:
98
for (rev_id, merge_depth, revno, end_of_merge
100
yield rev_id, '.'.join(map(str, revno)), merge_depth
102
# We're following a development line starting at a merged revision.
103
# We need to adjust depths down by the initial depth until we find
104
# a depth less than it. Then we use that depth as the adjustment.
105
# If and when we reach the mainline, depth adjustment ends.
106
depth_adjustment = None
107
for (rev_id, merge_depth, revno, end_of_merge
109
if depth_adjustment is None:
110
depth_adjustment = merge_depth
112
if merge_depth < depth_adjustment:
113
# From now on we reduce the depth adjustement, this can be
114
# surprising for users. The alternative requires two passes
115
# which breaks the fast display of the first revision
117
depth_adjustment = merge_depth
118
merge_depth -= depth_adjustment
119
yield rev_id, '.'.join(map(str, revno)), merge_depth
122
def compile_pattern(pattern, flags=0):
125
# use python's re.compile as we need to catch re.error in case of bad pattern
126
lazy_regex.reset_compile()
127
patternc = re.compile(pattern, flags)
128
except re.error as e:
129
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
133
def is_fixed_string(s):
134
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
139
class _GrepDiffOutputter(object):
140
"""Precalculate formatting based on options given for diff grep.
143
def __init__(self, opts):
145
self.outf = opts.outf
147
if opts.fixed_string:
148
self._old = opts.pattern
149
self._new = color_string(opts.pattern, FG.BOLD_RED)
150
self.get_writer = self._get_writer_fixed_highlighted
152
flags = opts.patternc.flags
153
self._sub = re.compile(
154
opts.pattern.join(("((?:", ")+)")), flags).sub
155
self._highlight = color_string("\\1", FG.BOLD_RED)
156
self.get_writer = self._get_writer_regexp_highlighted
158
self.get_writer = self._get_writer_plain
160
def get_file_header_writer(self):
161
"""Get function for writing file headers"""
162
write = self.outf.write
163
eol_marker = self.opts.eol_marker
165
def _line_writer(line):
166
write(line + eol_marker)
168
def _line_writer_color(line):
169
write(FG.BOLD_MAGENTA + line + FG.NONE + eol_marker)
170
if self.opts.show_color:
171
return _line_writer_color
176
def get_revision_header_writer(self):
177
"""Get function for writing revno lines"""
178
write = self.outf.write
179
eol_marker = self.opts.eol_marker
181
def _line_writer(line):
182
write(line + eol_marker)
184
def _line_writer_color(line):
185
write(FG.BOLD_BLUE + line + FG.NONE + eol_marker)
186
if self.opts.show_color:
187
return _line_writer_color
192
def _get_writer_plain(self):
193
"""Get function for writing uncoloured output"""
194
write = self.outf.write
195
eol_marker = self.opts.eol_marker
197
def _line_writer(line):
198
write(line + eol_marker)
201
def _get_writer_regexp_highlighted(self):
202
"""Get function for writing output with regexp match highlighted"""
203
_line_writer = self._get_writer_plain()
204
sub, highlight = self._sub, self._highlight
206
def _line_writer_regexp_highlighted(line):
207
"""Write formatted line with matched pattern highlighted"""
208
return _line_writer(line=sub(highlight, line))
209
return _line_writer_regexp_highlighted
211
def _get_writer_fixed_highlighted(self):
212
"""Get function for writing output with search string highlighted"""
213
_line_writer = self._get_writer_plain()
214
old, new = self._old, self._new
216
def _line_writer_fixed_highlighted(line):
217
"""Write formatted line with string searched for highlighted"""
218
return _line_writer(line=line.replace(old, new))
219
return _line_writer_fixed_highlighted
223
wt, branch, relpath = \
224
controldir.ControlDir.open_containing_tree_or_branch('.')
225
with branch.lock_read():
227
start_rev = opts.revision[0]
229
# if no revision is sepcified for diff grep we grep all changesets.
230
opts.revision = [RevisionSpec.from_string('revno:1'),
231
RevisionSpec.from_string('last:1')]
232
start_rev = opts.revision[0]
233
start_revid = start_rev.as_revision_id(branch)
234
if start_revid == b'null:':
236
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
237
if len(opts.revision) == 2:
238
end_rev = opts.revision[1]
239
end_revid = end_rev.as_revision_id(branch)
240
if end_revid is None:
241
end_revno, end_revid = branch.last_revision_info()
242
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
244
grep_mainline = (_rev_on_mainline(srevno_tuple)
245
and _rev_on_mainline(erevno_tuple))
247
# ensure that we go in reverse order
248
if srevno_tuple > erevno_tuple:
249
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
250
start_revid, end_revid = end_revid, start_revid
252
# Optimization: Traversing the mainline in reverse order is much
253
# faster when we don't want to look at merged revs. We try this
254
# with _linear_view_revisions. If all revs are to be grepped we
255
# use the slower _graph_view_revisions
256
if opts.levels == 1 and grep_mainline:
257
given_revs = _linear_view_revisions(
258
branch, start_revid, end_revid)
260
given_revs = _graph_view_revisions(
261
branch, start_revid, end_revid)
263
# We do an optimization below. For grepping a specific revison
264
# We don't need to call _graph_view_revisions which is slow.
265
# We create the start_rev_tuple for only that specific revision.
266
# _graph_view_revisions is used only for revision range.
267
start_revno = '.'.join(map(str, srevno_tuple))
268
start_rev_tuple = (start_revid, start_revno, 0)
269
given_revs = [start_rev_tuple]
270
repo = branch.repository
271
diff_pattern = re.compile(
272
b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
273
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
274
outputter = _GrepDiffOutputter(opts)
275
writeline = outputter.get_writer()
276
writerevno = outputter.get_revision_header_writer()
277
writefileheader = outputter.get_file_header_writer()
278
file_encoding = _user_encoding
279
for revid, revno, merge_depth in given_revs:
280
if opts.levels == 1 and merge_depth != 0:
281
# with level=1 show only top level
284
rev_spec = RevisionSpec_revid.from_string(
285
"revid:" + revid.decode('utf-8'))
286
new_rev = repo.get_revision(revid)
287
new_tree = rev_spec.as_tree(branch)
288
if len(new_rev.parent_ids) == 0:
289
ancestor_id = _mod_revision.NULL_REVISION
291
ancestor_id = new_rev.parent_ids[0]
292
old_tree = repo.revision_tree(ancestor_id)
294
diff.show_diff_trees(old_tree, new_tree, s,
295
old_label='', new_label='')
300
for line in text.splitlines():
301
if file_pattern.search(line):
304
elif diff_pattern.search(line):
306
writerevno("=== revno:%s ===" % (revno,))
307
display_revno = False
310
" %s" % (file_header.decode(file_encoding, 'replace'),))
312
line = line.decode(file_encoding, 'replace')
313
writeline(" %s" % (line,))
316
def versioned_grep(opts):
317
wt, branch, relpath = \
318
controldir.ControlDir.open_containing_tree_or_branch('.')
319
with branch.lock_read():
320
start_rev = opts.revision[0]
321
start_revid = start_rev.as_revision_id(branch)
322
if start_revid is None:
323
start_rev = RevisionSpec_revno.from_string("revno:1")
324
start_revid = start_rev.as_revision_id(branch)
325
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
327
if len(opts.revision) == 2:
328
end_rev = opts.revision[1]
329
end_revid = end_rev.as_revision_id(branch)
330
if end_revid is None:
331
end_revno, end_revid = branch.last_revision_info()
332
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
334
grep_mainline = (_rev_on_mainline(srevno_tuple)
335
and _rev_on_mainline(erevno_tuple))
337
# ensure that we go in reverse order
338
if srevno_tuple > erevno_tuple:
339
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
340
start_revid, end_revid = end_revid, start_revid
342
# Optimization: Traversing the mainline in reverse order is much
343
# faster when we don't want to look at merged revs. We try this
344
# with _linear_view_revisions. If all revs are to be grepped we
345
# use the slower _graph_view_revisions
346
if opts.levels == 1 and grep_mainline:
347
given_revs = _linear_view_revisions(
348
branch, start_revid, end_revid)
350
given_revs = _graph_view_revisions(
351
branch, start_revid, end_revid)
353
# We do an optimization below. For grepping a specific revison
354
# We don't need to call _graph_view_revisions which is slow.
355
# We create the start_rev_tuple for only that specific revision.
356
# _graph_view_revisions is used only for revision range.
357
start_revno = '.'.join(map(str, srevno_tuple))
358
start_rev_tuple = (start_revid, start_revno, 0)
359
given_revs = [start_rev_tuple]
361
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
362
opts.outputter = _Outputter(opts, use_cache=True)
364
for revid, revno, merge_depth in given_revs:
365
if opts.levels == 1 and merge_depth != 0:
366
# with level=1 show only top level
369
rev = RevisionSpec_revid.from_string(
370
"revid:" + revid.decode('utf-8'))
371
tree = rev.as_tree(branch)
372
for path in opts.path_list:
373
tree_path = osutils.pathjoin(relpath, path)
374
if not tree.has_filename(tree_path):
375
trace.warning("Skipped unknown file '%s'.", path)
378
if osutils.isdir(path):
380
dir_grep(tree, path, relpath, opts, revno, path_prefix)
383
tree, tree_path, '.', path, opts, revno)
386
def workingtree_grep(opts):
387
revno = opts.print_revno = None # for working tree set revno to None
389
tree, branch, relpath = \
390
controldir.ControlDir.open_containing_tree_or_branch('.')
392
msg = ('Cannot search working tree. Working tree not found.\n'
393
'To search for specific revision in history use the -r option.')
394
raise errors.BzrCommandError(msg)
396
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
397
opts.outputter = _Outputter(opts)
399
with tree.lock_read():
400
for path in opts.path_list:
401
if osutils.isdir(path):
403
dir_grep(tree, path, relpath, opts, revno, path_prefix)
405
with open(path, 'rb') as f:
406
_file_grep(f.read(), path, opts, revno)
409
def _skip_file(include, exclude, path):
410
if include and not _path_in_glob_list(path, include):
412
if exclude and _path_in_glob_list(path, exclude):
417
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
418
# setup relpath to open files relative to cwd
421
rpath = osutils.pathjoin('..', relpath)
423
from_dir = osutils.pathjoin(relpath, path)
425
# start searching recursively from root
430
to_grep_append = to_grep.append
431
# GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
432
# and hits manually refilled. Could do this again if it was
433
# for a good reason, otherwise cache might want purging.
434
outputter = opts.outputter
435
for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
436
from_dir=from_dir, recursive=opts.recursive):
438
if _skip_file(opts.include, opts.exclude, fp):
441
if fc == 'V' and fkind == 'file':
442
tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
443
if revno is not None:
444
# If old result is valid, print results immediately.
445
# Otherwise, add file info to to_grep so that the
446
# loop later will get chunks and grep them
447
cache_id = tree.get_file_revision(tree_path, fid)
448
if cache_id in outputter.cache:
449
# GZ 2010-06-05: Not really sure caching and re-outputting
450
# the old path is really the right thing,
451
# but it's what the old code seemed to do
452
outputter.write_cached_lines(cache_id, revno)
454
to_grep_append((tree_path, (fp, tree_path)))
456
# we are grepping working tree.
460
path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
461
if opts.files_with_matches or opts.files_without_match:
462
# Optimize for wtree list-only as we don't need to read the
464
with open(path_for_file, 'rb', buffering=4096) as file:
465
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
467
with open(path_for_file, 'rb') as f:
468
_file_grep(f.read(), fp, opts, revno, path_prefix)
470
if revno is not None: # grep versioned files
471
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
472
path = _make_display_path(relpath, path)
473
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
474
tree.get_file_revision(tree_path))
477
def _make_display_path(relpath, path):
478
"""Return path string relative to user cwd.
480
Take tree's 'relpath' and user supplied 'path', and return path
481
that can be displayed to the user.
484
# update path so to display it w.r.t cwd
485
# handle windows slash separator
486
path = osutils.normpath(osutils.pathjoin(relpath, path))
487
path = path.replace('\\', '/')
488
path = path.replace(relpath + '/', '', 1)
492
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix=None):
493
"""Create a file object for the specified id and pass it on to _file_grep.
496
path = _make_display_path(relpath, path)
497
file_text = tree.get_file_text(tree_path)
498
_file_grep(file_text, path, opts, revno, path_prefix)
501
def _path_in_glob_list(path, glob_list):
502
for glob in glob_list:
503
if fnmatch(path, glob):
508
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
509
# test and skip binary files
510
if b'\x00' in file.read(1024):
512
trace.warning("Binary file '%s' skipped.", path)
515
file.seek(0) # search from beginning
518
if opts.fixed_string:
519
pattern = opts.pattern.encode(_user_encoding, 'replace')
524
else: # not fixed_string
526
if opts.patternc.search(line):
530
if (opts.files_with_matches and found) or \
531
(opts.files_without_match and not found):
532
if path_prefix and path_prefix != '.':
533
# user has passed a dir arg, show that as result prefix
534
path = osutils.pathjoin(path_prefix, path)
535
opts.outputter.get_writer(path, None, None)()
538
class _Outputter(object):
539
"""Precalculate formatting based on options given
541
The idea here is to do this work only once per run, and finally return a
542
function that will do the minimum amount possible for each match.
545
def __init__(self, opts, use_cache=False):
546
self.outf = opts.outf
548
# self.cache is used to cache results for dir grep based on fid.
549
# If the fid is does not change between results, it means that
550
# the result will be the same apart from revno. In such a case
551
# we avoid getting file chunks from repo and grepping. The result
552
# is just printed by replacing old revno with new one.
556
no_line = opts.files_with_matches or opts.files_without_match
560
self.get_writer = self._get_writer_plain
561
elif opts.fixed_string:
562
self._old = opts.pattern
563
self._new = color_string(opts.pattern, FG.BOLD_RED)
564
self.get_writer = self._get_writer_fixed_highlighted
566
flags = opts.patternc.flags
567
self._sub = re.compile(
568
opts.pattern.join(("((?:", ")+)")), flags).sub
569
self._highlight = color_string("\\1", FG.BOLD_RED)
570
self.get_writer = self._get_writer_regexp_highlighted
571
path_start = FG.MAGENTA
573
sep = color_string(':', FG.BOLD_CYAN)
574
rev_sep = color_string('~', FG.BOLD_YELLOW)
576
self.get_writer = self._get_writer_plain
577
path_start = path_end = ""
581
parts = [path_start, "%(path)s"]
583
parts.extend([rev_sep, "%(revno)s"])
584
self._format_initial = "".join(parts)
587
if not opts.print_revno:
588
parts.append(path_end)
591
parts.extend([sep, "%(lineno)s"])
592
parts.extend([sep, "%(line)s"])
593
parts.append(opts.eol_marker)
594
self._format_perline = "".join(parts)
596
def _get_writer_plain(self, path, revno, cache_id):
597
"""Get function for writing uncoloured output"""
598
per_line = self._format_perline
599
start = self._format_initial % {"path": path, "revno": revno}
600
write = self.outf.write
601
if self.cache is not None and cache_id is not None:
603
self.cache[cache_id] = path, result_list
604
add_to_cache = result_list.append
606
def _line_cache_and_writer(**kwargs):
607
"""Write formatted line and cache arguments"""
608
end = per_line % kwargs
611
return _line_cache_and_writer
613
def _line_writer(**kwargs):
614
"""Write formatted line from arguments given by underlying opts"""
615
write(start + per_line % kwargs)
618
def write_cached_lines(self, cache_id, revno):
619
"""Write cached results out again for new revision"""
620
cached_path, cached_matches = self.cache[cache_id]
621
start = self._format_initial % {"path": cached_path, "revno": revno}
622
write = self.outf.write
623
for end in cached_matches:
626
def _get_writer_regexp_highlighted(self, path, revno, cache_id):
627
"""Get function for writing output with regexp match highlighted"""
628
_line_writer = self._get_writer_plain(path, revno, cache_id)
629
sub, highlight = self._sub, self._highlight
631
def _line_writer_regexp_highlighted(line, **kwargs):
632
"""Write formatted line with matched pattern highlighted"""
633
return _line_writer(line=sub(highlight, line), **kwargs)
634
return _line_writer_regexp_highlighted
636
def _get_writer_fixed_highlighted(self, path, revno, cache_id):
637
"""Get function for writing output with search string highlighted"""
638
_line_writer = self._get_writer_plain(path, revno, cache_id)
639
old, new = self._old, self._new
641
def _line_writer_fixed_highlighted(line, **kwargs):
642
"""Write formatted line with string searched for highlighted"""
643
return _line_writer(line=line.replace(old, new), **kwargs)
644
return _line_writer_fixed_highlighted
647
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
648
# test and skip binary files
649
if b'\x00' in file_text[:1024]:
651
trace.warning("Binary file '%s' skipped.", path)
654
if path_prefix and path_prefix != '.':
655
# user has passed a dir arg, show that as result prefix
656
path = osutils.pathjoin(path_prefix, path)
658
# GZ 2010-06-07: There's no actual guarentee the file contents will be in
659
# the user encoding, but we have to guess something and it
660
# is a reasonable default without a better mechanism.
661
file_encoding = _user_encoding
662
pattern = opts.pattern.encode(_user_encoding, 'replace')
664
writeline = opts.outputter.get_writer(path, revno, cache_id)
666
if opts.files_with_matches or opts.files_without_match:
667
if opts.fixed_string:
668
found = pattern in file_text
670
search = opts.patternc.search
671
if b"$" not in pattern:
672
found = search(file_text) is not None
674
for line in file_text.splitlines():
680
if (opts.files_with_matches and found) or \
681
(opts.files_without_match and not found):
683
elif opts.fixed_string:
684
# Fast path for no match, search through the entire file at once rather
685
# than a line at a time. <http://effbot.org/zone/stringlib.htm>
686
i = file_text.find(pattern)
689
b = file_text.rfind(b"\n", 0, i) + 1
691
start = file_text.count(b"\n", 0, b) + 1
692
file_text = file_text[b:]
694
for index, line in enumerate(file_text.splitlines()):
696
line = line.decode(file_encoding, 'replace')
697
writeline(lineno=index + start, line=line)
699
for line in file_text.splitlines():
701
line = line.decode(file_encoding, 'replace')
704
# Fast path on no match, the re module avoids bad behaviour in most
705
# standard cases, but perhaps could try and detect backtracking
706
# patterns here and avoid whole text search in those cases
707
search = opts.patternc.search
708
if b"$" not in pattern:
709
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
710
# through revisions as bazaar returns binary mode
711
# and trailing \r breaks $ as line ending match
712
m = search(file_text)
715
b = file_text.rfind(b"\n", 0, m.start()) + 1
717
start = file_text.count(b"\n", 0, b) + 1
718
file_text = file_text[b:]
722
for index, line in enumerate(file_text.splitlines()):
724
line = line.decode(file_encoding, 'replace')
725
writeline(lineno=index + start, line=line)
727
for line in file_text.splitlines():
729
line = line.decode(file_encoding, 'replace')