1
# Copyright (C) 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
21
from .lazy_import import lazy_import
22
lazy_import(globals(), """
23
from fnmatch import fnmatch
25
from breezy._termcolor import color_string, FG
35
revision as _mod_revision,
38
from .revisionspec import (
47
_user_encoding = osutils.get_user_encoding()
50
class _RevisionNotLinear(Exception):
51
"""Raised when a revision is not on left-hand history."""
54
class GrepOptions(object):
55
"""Container to pass around grep options.
57
This class is used as a container to pass around user option and
58
some other params (like outf) to processing functions. This makes
59
it easier to add more options as grep evolves.
74
files_with_matches = False
75
files_without_match = False
90
def _rev_on_mainline(rev_tuple):
91
"""returns True is rev tuple is on mainline"""
92
if len(rev_tuple) == 1:
94
return rev_tuple[1] == 0 and rev_tuple[2] == 0
97
# NOTE: _linear_view_revisions is basided on
98
# breezy.log._linear_view_revisions.
99
# This should probably be a common public API
100
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
101
# requires that start is older than end
102
repo = branch.repository
103
graph = repo.get_graph()
104
for revision_id in graph.iter_lefthand_ancestry(
105
end_rev_id, (_mod_revision.NULL_REVISION, )):
106
revno = branch.revision_id_to_dotted_revno(revision_id)
107
revno_str = '.'.join(str(n) for n in revno)
108
if revision_id == start_rev_id:
109
yield revision_id, revno_str, 0
111
yield revision_id, revno_str, 0
114
# NOTE: _graph_view_revisions is copied from
115
# breezy.log._graph_view_revisions.
116
# This should probably be a common public API
117
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
118
rebase_initial_depths=True):
119
"""Calculate revisions to view including merges, newest to oldest.
121
:param branch: the branch
122
:param start_rev_id: the lower revision-id
123
:param end_rev_id: the upper revision-id
124
:param rebase_initial_depth: should depths be rebased until a mainline
126
:return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
128
# requires that start is older than end
129
view_revisions = branch.iter_merge_sorted_revisions(
130
start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
131
stop_rule="with-merges")
132
if not rebase_initial_depths:
133
for (rev_id, merge_depth, revno, end_of_merge
135
yield rev_id, '.'.join(map(str, revno)), merge_depth
137
# We're following a development line starting at a merged revision.
138
# We need to adjust depths down by the initial depth until we find
139
# a depth less than it. Then we use that depth as the adjustment.
140
# If and when we reach the mainline, depth adjustment ends.
141
depth_adjustment = None
142
for (rev_id, merge_depth, revno, end_of_merge
144
if depth_adjustment is None:
145
depth_adjustment = merge_depth
147
if merge_depth < depth_adjustment:
148
# From now on we reduce the depth adjustement, this can be
149
# surprising for users. The alternative requires two passes
150
# which breaks the fast display of the first revision
152
depth_adjustment = merge_depth
153
merge_depth -= depth_adjustment
154
yield rev_id, '.'.join(map(str, revno)), merge_depth
157
def compile_pattern(pattern, flags=0):
159
return re.compile(pattern, flags)
160
except re.error as e:
161
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
165
def is_fixed_string(s):
166
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
171
class _GrepDiffOutputter(object):
172
"""Precalculate formatting based on options given for diff grep.
175
def __init__(self, opts):
177
self.outf = opts.outf
179
if opts.fixed_string:
180
self._old = opts.pattern
181
self._new = color_string(opts.pattern, FG.BOLD_RED)
182
self.get_writer = self._get_writer_fixed_highlighted
184
flags = opts.patternc.flags
185
self._sub = re.compile(
186
opts.pattern.join(("((?:", ")+)")), flags).sub
187
self._highlight = color_string("\\1", FG.BOLD_RED)
188
self.get_writer = self._get_writer_regexp_highlighted
190
self.get_writer = self._get_writer_plain
192
def get_file_header_writer(self):
193
"""Get function for writing file headers"""
194
write = self.outf.write
195
eol_marker = self.opts.eol_marker
197
def _line_writer(line):
198
write(line + eol_marker)
200
def _line_writer_color(line):
201
write(FG.BOLD_MAGENTA + line + FG.NONE + eol_marker)
202
if self.opts.show_color:
203
return _line_writer_color
208
def get_revision_header_writer(self):
209
"""Get function for writing revno lines"""
210
write = self.outf.write
211
eol_marker = self.opts.eol_marker
213
def _line_writer(line):
214
write(line + eol_marker)
216
def _line_writer_color(line):
217
write(FG.BOLD_BLUE + line + FG.NONE + eol_marker)
218
if self.opts.show_color:
219
return _line_writer_color
224
def _get_writer_plain(self):
225
"""Get function for writing uncoloured output"""
226
write = self.outf.write
227
eol_marker = self.opts.eol_marker
229
def _line_writer(line):
230
write(line + eol_marker)
233
def _get_writer_regexp_highlighted(self):
234
"""Get function for writing output with regexp match highlighted"""
235
_line_writer = self._get_writer_plain()
236
sub, highlight = self._sub, self._highlight
238
def _line_writer_regexp_highlighted(line):
239
"""Write formatted line with matched pattern highlighted"""
240
return _line_writer(line=sub(highlight, line))
241
return _line_writer_regexp_highlighted
243
def _get_writer_fixed_highlighted(self):
244
"""Get function for writing output with search string highlighted"""
245
_line_writer = self._get_writer_plain()
246
old, new = self._old, self._new
248
def _line_writer_fixed_highlighted(line):
249
"""Write formatted line with string searched for highlighted"""
250
return _line_writer(line=line.replace(old, new))
251
return _line_writer_fixed_highlighted
255
wt, branch, relpath = \
256
controldir.ControlDir.open_containing_tree_or_branch('.')
257
with branch.lock_read():
259
start_rev = opts.revision[0]
261
# if no revision is sepcified for diff grep we grep all changesets.
262
opts.revision = [RevisionSpec.from_string('revno:1'),
263
RevisionSpec.from_string('last:1')]
264
start_rev = opts.revision[0]
265
start_revid = start_rev.as_revision_id(branch)
266
if start_revid == b'null:':
268
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
269
if len(opts.revision) == 2:
270
end_rev = opts.revision[1]
271
end_revid = end_rev.as_revision_id(branch)
272
if end_revid is None:
273
end_revno, end_revid = branch.last_revision_info()
274
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
276
grep_mainline = (_rev_on_mainline(srevno_tuple)
277
and _rev_on_mainline(erevno_tuple))
279
# ensure that we go in reverse order
280
if srevno_tuple > erevno_tuple:
281
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
282
start_revid, end_revid = end_revid, start_revid
284
# Optimization: Traversing the mainline in reverse order is much
285
# faster when we don't want to look at merged revs. We try this
286
# with _linear_view_revisions. If all revs are to be grepped we
287
# use the slower _graph_view_revisions
288
if opts.levels == 1 and grep_mainline:
289
given_revs = _linear_view_revisions(
290
branch, start_revid, end_revid)
292
given_revs = _graph_view_revisions(
293
branch, start_revid, end_revid)
295
# We do an optimization below. For grepping a specific revison
296
# We don't need to call _graph_view_revisions which is slow.
297
# We create the start_rev_tuple for only that specific revision.
298
# _graph_view_revisions is used only for revision range.
299
start_revno = '.'.join(map(str, srevno_tuple))
300
start_rev_tuple = (start_revid, start_revno, 0)
301
given_revs = [start_rev_tuple]
302
repo = branch.repository
303
diff_pattern = re.compile(
304
b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
305
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
306
outputter = _GrepDiffOutputter(opts)
307
writeline = outputter.get_writer()
308
writerevno = outputter.get_revision_header_writer()
309
writefileheader = outputter.get_file_header_writer()
310
file_encoding = _user_encoding
311
for revid, revno, merge_depth in given_revs:
312
if opts.levels == 1 and merge_depth != 0:
313
# with level=1 show only top level
316
rev_spec = RevisionSpec_revid.from_string(
317
"revid:" + revid.decode('utf-8'))
318
new_rev = repo.get_revision(revid)
319
new_tree = rev_spec.as_tree(branch)
320
if len(new_rev.parent_ids) == 0:
321
ancestor_id = _mod_revision.NULL_REVISION
323
ancestor_id = new_rev.parent_ids[0]
324
old_tree = repo.revision_tree(ancestor_id)
326
diff.show_diff_trees(old_tree, new_tree, s,
327
old_label='', new_label='')
332
for line in text.splitlines():
333
if file_pattern.search(line):
336
elif diff_pattern.search(line):
338
writerevno("=== revno:%s ===" % (revno,))
339
display_revno = False
342
" %s" % (file_header.decode(file_encoding, 'replace'),))
344
line = line.decode(file_encoding, 'replace')
345
writeline(" %s" % (line,))
348
def versioned_grep(opts):
349
wt, branch, relpath = \
350
controldir.ControlDir.open_containing_tree_or_branch('.')
351
with branch.lock_read():
352
start_rev = opts.revision[0]
353
start_revid = start_rev.as_revision_id(branch)
354
if start_revid is None:
355
start_rev = RevisionSpec_revno.from_string("revno:1")
356
start_revid = start_rev.as_revision_id(branch)
357
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
359
if len(opts.revision) == 2:
360
end_rev = opts.revision[1]
361
end_revid = end_rev.as_revision_id(branch)
362
if end_revid is None:
363
end_revno, end_revid = branch.last_revision_info()
364
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
366
grep_mainline = (_rev_on_mainline(srevno_tuple)
367
and _rev_on_mainline(erevno_tuple))
369
# ensure that we go in reverse order
370
if srevno_tuple > erevno_tuple:
371
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
372
start_revid, end_revid = end_revid, start_revid
374
# Optimization: Traversing the mainline in reverse order is much
375
# faster when we don't want to look at merged revs. We try this
376
# with _linear_view_revisions. If all revs are to be grepped we
377
# use the slower _graph_view_revisions
378
if opts.levels == 1 and grep_mainline:
379
given_revs = _linear_view_revisions(
380
branch, start_revid, end_revid)
382
given_revs = _graph_view_revisions(
383
branch, start_revid, end_revid)
385
# We do an optimization below. For grepping a specific revison
386
# We don't need to call _graph_view_revisions which is slow.
387
# We create the start_rev_tuple for only that specific revision.
388
# _graph_view_revisions is used only for revision range.
389
start_revno = '.'.join(map(str, srevno_tuple))
390
start_rev_tuple = (start_revid, start_revno, 0)
391
given_revs = [start_rev_tuple]
393
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
394
opts.outputter = _Outputter(opts, use_cache=True)
396
for revid, revno, merge_depth in given_revs:
397
if opts.levels == 1 and merge_depth != 0:
398
# with level=1 show only top level
401
rev = RevisionSpec_revid.from_string(
402
"revid:" + revid.decode('utf-8'))
403
tree = rev.as_tree(branch)
404
for path in opts.path_list:
405
tree_path = osutils.pathjoin(relpath, path)
406
if not tree.has_filename(tree_path):
407
trace.warning("Skipped unknown file '%s'.", path)
410
if osutils.isdir(path):
412
dir_grep(tree, path, relpath, opts, revno, path_prefix)
415
tree, tree_path, '.', path, opts, revno)
418
def workingtree_grep(opts):
419
revno = opts.print_revno = None # for working tree set revno to None
421
tree, branch, relpath = \
422
controldir.ControlDir.open_containing_tree_or_branch('.')
424
msg = ('Cannot search working tree. Working tree not found.\n'
425
'To search for specific revision in history use the -r option.')
426
raise errors.BzrCommandError(msg)
428
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
429
opts.outputter = _Outputter(opts)
431
with tree.lock_read():
432
for path in opts.path_list:
433
if osutils.isdir(path):
435
dir_grep(tree, path, relpath, opts, revno, path_prefix)
437
with open(path, 'rb') as f:
438
_file_grep(f.read(), path, opts, revno)
441
def _skip_file(include, exclude, path):
442
if include and not _path_in_glob_list(path, include):
444
if exclude and _path_in_glob_list(path, exclude):
449
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
450
# setup relpath to open files relative to cwd
453
rpath = osutils.pathjoin('..', relpath)
455
from_dir = osutils.pathjoin(relpath, path)
457
# start searching recursively from root
462
to_grep_append = to_grep.append
463
# GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
464
# and hits manually refilled. Could do this again if it was
465
# for a good reason, otherwise cache might want purging.
466
outputter = opts.outputter
467
for fp, fc, fkind, entry in tree.list_files(
468
include_root=False, from_dir=from_dir, recursive=opts.recursive):
470
if _skip_file(opts.include, opts.exclude, fp):
473
if fc == 'V' and fkind == 'file':
474
tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
475
if revno is not None:
476
# If old result is valid, print results immediately.
477
# Otherwise, add file info to to_grep so that the
478
# loop later will get chunks and grep them
479
cache_id = tree.get_file_revision(tree_path)
480
if cache_id in outputter.cache:
481
# GZ 2010-06-05: Not really sure caching and re-outputting
482
# the old path is really the right thing,
483
# but it's what the old code seemed to do
484
outputter.write_cached_lines(cache_id, revno)
486
to_grep_append((tree_path, (fp, tree_path)))
488
# we are grepping working tree.
492
path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
493
if opts.files_with_matches or opts.files_without_match:
494
# Optimize for wtree list-only as we don't need to read the
496
with open(path_for_file, 'rb', buffering=4096) as file:
497
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
499
with open(path_for_file, 'rb') as f:
500
_file_grep(f.read(), fp, opts, revno, path_prefix)
502
if revno is not None: # grep versioned files
503
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
504
path = _make_display_path(relpath, path)
505
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
506
tree.get_file_revision(tree_path))
509
def _make_display_path(relpath, path):
510
"""Return path string relative to user cwd.
512
Take tree's 'relpath' and user supplied 'path', and return path
513
that can be displayed to the user.
516
# update path so to display it w.r.t cwd
517
# handle windows slash separator
518
path = osutils.normpath(osutils.pathjoin(relpath, path))
519
path = path.replace('\\', '/')
520
path = path.replace(relpath + '/', '', 1)
524
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix=None):
525
"""Create a file object for the specified id and pass it on to _file_grep.
528
path = _make_display_path(relpath, path)
529
file_text = tree.get_file_text(tree_path)
530
_file_grep(file_text, path, opts, revno, path_prefix)
533
def _path_in_glob_list(path, glob_list):
534
for glob in glob_list:
535
if fnmatch(path, glob):
540
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
541
# test and skip binary files
542
if b'\x00' in file.read(1024):
544
trace.warning("Binary file '%s' skipped.", path)
547
file.seek(0) # search from beginning
550
if opts.fixed_string:
551
pattern = opts.pattern.encode(_user_encoding, 'replace')
556
else: # not fixed_string
558
if opts.patternc.search(line):
562
if (opts.files_with_matches and found) or \
563
(opts.files_without_match and not found):
564
if path_prefix and path_prefix != '.':
565
# user has passed a dir arg, show that as result prefix
566
path = osutils.pathjoin(path_prefix, path)
567
opts.outputter.get_writer(path, None, None)()
570
class _Outputter(object):
571
"""Precalculate formatting based on options given
573
The idea here is to do this work only once per run, and finally return a
574
function that will do the minimum amount possible for each match.
577
def __init__(self, opts, use_cache=False):
578
self.outf = opts.outf
580
# self.cache is used to cache results for dir grep based on fid.
581
# If the fid is does not change between results, it means that
582
# the result will be the same apart from revno. In such a case
583
# we avoid getting file chunks from repo and grepping. The result
584
# is just printed by replacing old revno with new one.
588
no_line = opts.files_with_matches or opts.files_without_match
592
self.get_writer = self._get_writer_plain
593
elif opts.fixed_string:
594
self._old = opts.pattern
595
self._new = color_string(opts.pattern, FG.BOLD_RED)
596
self.get_writer = self._get_writer_fixed_highlighted
598
flags = opts.patternc.flags
599
self._sub = re.compile(
600
opts.pattern.join(("((?:", ")+)")), flags).sub
601
self._highlight = color_string("\\1", FG.BOLD_RED)
602
self.get_writer = self._get_writer_regexp_highlighted
603
path_start = FG.MAGENTA
605
sep = color_string(':', FG.BOLD_CYAN)
606
rev_sep = color_string('~', FG.BOLD_YELLOW)
608
self.get_writer = self._get_writer_plain
609
path_start = path_end = ""
613
parts = [path_start, "%(path)s"]
615
parts.extend([rev_sep, "%(revno)s"])
616
self._format_initial = "".join(parts)
619
if not opts.print_revno:
620
parts.append(path_end)
623
parts.extend([sep, "%(lineno)s"])
624
parts.extend([sep, "%(line)s"])
625
parts.append(opts.eol_marker)
626
self._format_perline = "".join(parts)
628
def _get_writer_plain(self, path, revno, cache_id):
629
"""Get function for writing uncoloured output"""
630
per_line = self._format_perline
631
start = self._format_initial % {"path": path, "revno": revno}
632
write = self.outf.write
633
if self.cache is not None and cache_id is not None:
635
self.cache[cache_id] = path, result_list
636
add_to_cache = result_list.append
638
def _line_cache_and_writer(**kwargs):
639
"""Write formatted line and cache arguments"""
640
end = per_line % kwargs
643
return _line_cache_and_writer
645
def _line_writer(**kwargs):
646
"""Write formatted line from arguments given by underlying opts"""
647
write(start + per_line % kwargs)
650
def write_cached_lines(self, cache_id, revno):
651
"""Write cached results out again for new revision"""
652
cached_path, cached_matches = self.cache[cache_id]
653
start = self._format_initial % {"path": cached_path, "revno": revno}
654
write = self.outf.write
655
for end in cached_matches:
658
def _get_writer_regexp_highlighted(self, path, revno, cache_id):
659
"""Get function for writing output with regexp match highlighted"""
660
_line_writer = self._get_writer_plain(path, revno, cache_id)
661
sub, highlight = self._sub, self._highlight
663
def _line_writer_regexp_highlighted(line, **kwargs):
664
"""Write formatted line with matched pattern highlighted"""
665
return _line_writer(line=sub(highlight, line), **kwargs)
666
return _line_writer_regexp_highlighted
668
def _get_writer_fixed_highlighted(self, path, revno, cache_id):
669
"""Get function for writing output with search string highlighted"""
670
_line_writer = self._get_writer_plain(path, revno, cache_id)
671
old, new = self._old, self._new
673
def _line_writer_fixed_highlighted(line, **kwargs):
674
"""Write formatted line with string searched for highlighted"""
675
return _line_writer(line=line.replace(old, new), **kwargs)
676
return _line_writer_fixed_highlighted
679
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
680
# test and skip binary files
681
if b'\x00' in file_text[:1024]:
683
trace.warning("Binary file '%s' skipped.", path)
686
if path_prefix and path_prefix != '.':
687
# user has passed a dir arg, show that as result prefix
688
path = osutils.pathjoin(path_prefix, path)
690
# GZ 2010-06-07: There's no actual guarentee the file contents will be in
691
# the user encoding, but we have to guess something and it
692
# is a reasonable default without a better mechanism.
693
file_encoding = _user_encoding
694
pattern = opts.pattern.encode(_user_encoding, 'replace')
696
writeline = opts.outputter.get_writer(path, revno, cache_id)
698
if opts.files_with_matches or opts.files_without_match:
699
if opts.fixed_string:
700
found = pattern in file_text
702
search = opts.patternc.search
703
if b"$" not in pattern:
704
found = search(file_text) is not None
706
for line in file_text.splitlines():
712
if (opts.files_with_matches and found) or \
713
(opts.files_without_match and not found):
715
elif opts.fixed_string:
716
# Fast path for no match, search through the entire file at once rather
717
# than a line at a time. <http://effbot.org/zone/stringlib.htm>
718
i = file_text.find(pattern)
721
b = file_text.rfind(b"\n", 0, i) + 1
723
start = file_text.count(b"\n", 0, b) + 1
724
file_text = file_text[b:]
726
for index, line in enumerate(file_text.splitlines()):
728
line = line.decode(file_encoding, 'replace')
729
writeline(lineno=index + start, line=line)
731
for line in file_text.splitlines():
733
line = line.decode(file_encoding, 'replace')
736
# Fast path on no match, the re module avoids bad behaviour in most
737
# standard cases, but perhaps could try and detect backtracking
738
# patterns here and avoid whole text search in those cases
739
search = opts.patternc.search
740
if b"$" not in pattern:
741
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
742
# through revisions as bazaar returns binary mode
743
# and trailing \r breaks $ as line ending match
744
m = search(file_text)
747
b = file_text.rfind(b"\n", 0, m.start()) + 1
749
start = file_text.count(b"\n", 0, b) + 1
750
file_text = file_text[b:]
754
for index, line in enumerate(file_text.splitlines()):
756
line = line.decode(file_encoding, 'replace')
757
writeline(lineno=index + start, line=line)
759
for line in file_text.splitlines():
761
line = line.decode(file_encoding, 'replace')