1
# Copyright (C) 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from io import BytesIO
20
from .lazy_import import lazy_import
21
lazy_import(globals(), """
22
from fnmatch import fnmatch
24
from breezy._termcolor import color_string, FG
34
revision as _mod_revision,
37
from .revisionspec import (
43
_user_encoding = osutils.get_user_encoding()
46
class _RevisionNotLinear(Exception):
47
"""Raised when a revision is not on left-hand history."""
50
class GrepOptions(object):
51
"""Container to pass around grep options.
53
This class is used as a container to pass around user option and
54
some other params (like outf) to processing functions. This makes
55
it easier to add more options as grep evolves.
70
files_with_matches = False
71
files_without_match = False
86
def _rev_on_mainline(rev_tuple):
87
"""returns True is rev tuple is on mainline"""
88
if len(rev_tuple) == 1:
90
return rev_tuple[1] == 0 and rev_tuple[2] == 0
93
# NOTE: _linear_view_revisions is basided on
94
# breezy.log._linear_view_revisions.
95
# This should probably be a common public API
96
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
97
# requires that start is older than end
98
repo = branch.repository
99
graph = repo.get_graph()
100
for revision_id in graph.iter_lefthand_ancestry(
101
end_rev_id, (_mod_revision.NULL_REVISION, )):
102
revno = branch.revision_id_to_dotted_revno(revision_id)
103
revno_str = '.'.join(str(n) for n in revno)
104
if revision_id == start_rev_id:
105
yield revision_id, revno_str, 0
107
yield revision_id, revno_str, 0
110
# NOTE: _graph_view_revisions is copied from
111
# breezy.log._graph_view_revisions.
112
# This should probably be a common public API
113
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
114
rebase_initial_depths=True):
115
"""Calculate revisions to view including merges, newest to oldest.
117
:param branch: the branch
118
:param start_rev_id: the lower revision-id
119
:param end_rev_id: the upper revision-id
120
:param rebase_initial_depth: should depths be rebased until a mainline
122
:return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
124
# requires that start is older than end
125
view_revisions = branch.iter_merge_sorted_revisions(
126
start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
127
stop_rule="with-merges")
128
if not rebase_initial_depths:
129
for (rev_id, merge_depth, revno, end_of_merge
131
yield rev_id, '.'.join(map(str, revno)), merge_depth
133
# We're following a development line starting at a merged revision.
134
# We need to adjust depths down by the initial depth until we find
135
# a depth less than it. Then we use that depth as the adjustment.
136
# If and when we reach the mainline, depth adjustment ends.
137
depth_adjustment = None
138
for (rev_id, merge_depth, revno, end_of_merge
140
if depth_adjustment is None:
141
depth_adjustment = merge_depth
143
if merge_depth < depth_adjustment:
144
# From now on we reduce the depth adjustement, this can be
145
# surprising for users. The alternative requires two passes
146
# which breaks the fast display of the first revision
148
depth_adjustment = merge_depth
149
merge_depth -= depth_adjustment
150
yield rev_id, '.'.join(map(str, revno)), merge_depth
153
def compile_pattern(pattern, flags=0):
155
return re.compile(pattern, flags)
156
except re.error as e:
157
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
161
def is_fixed_string(s):
162
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
167
class _GrepDiffOutputter(object):
168
"""Precalculate formatting based on options given for diff grep.
171
def __init__(self, opts):
173
self.outf = opts.outf
175
if opts.fixed_string:
176
self._old = opts.pattern
177
self._new = color_string(opts.pattern, FG.BOLD_RED)
178
self.get_writer = self._get_writer_fixed_highlighted
180
flags = opts.patternc.flags
181
self._sub = re.compile(
182
opts.pattern.join(("((?:", ")+)")), flags).sub
183
self._highlight = color_string("\\1", FG.BOLD_RED)
184
self.get_writer = self._get_writer_regexp_highlighted
186
self.get_writer = self._get_writer_plain
188
def get_file_header_writer(self):
189
"""Get function for writing file headers"""
190
write = self.outf.write
191
eol_marker = self.opts.eol_marker
193
def _line_writer(line):
194
write(line + eol_marker)
196
def _line_writer_color(line):
197
write(FG.BOLD_MAGENTA + line + FG.NONE + eol_marker)
198
if self.opts.show_color:
199
return _line_writer_color
204
def get_revision_header_writer(self):
205
"""Get function for writing revno lines"""
206
write = self.outf.write
207
eol_marker = self.opts.eol_marker
209
def _line_writer(line):
210
write(line + eol_marker)
212
def _line_writer_color(line):
213
write(FG.BOLD_BLUE + line + FG.NONE + eol_marker)
214
if self.opts.show_color:
215
return _line_writer_color
220
def _get_writer_plain(self):
221
"""Get function for writing uncoloured output"""
222
write = self.outf.write
223
eol_marker = self.opts.eol_marker
225
def _line_writer(line):
226
write(line + eol_marker)
229
def _get_writer_regexp_highlighted(self):
230
"""Get function for writing output with regexp match highlighted"""
231
_line_writer = self._get_writer_plain()
232
sub, highlight = self._sub, self._highlight
234
def _line_writer_regexp_highlighted(line):
235
"""Write formatted line with matched pattern highlighted"""
236
return _line_writer(line=sub(highlight, line))
237
return _line_writer_regexp_highlighted
239
def _get_writer_fixed_highlighted(self):
240
"""Get function for writing output with search string highlighted"""
241
_line_writer = self._get_writer_plain()
242
old, new = self._old, self._new
244
def _line_writer_fixed_highlighted(line):
245
"""Write formatted line with string searched for highlighted"""
246
return _line_writer(line=line.replace(old, new))
247
return _line_writer_fixed_highlighted
251
wt, branch, relpath = \
252
controldir.ControlDir.open_containing_tree_or_branch('.')
253
with branch.lock_read():
255
start_rev = opts.revision[0]
257
# if no revision is sepcified for diff grep we grep all changesets.
258
opts.revision = [RevisionSpec.from_string('revno:1'),
259
RevisionSpec.from_string('last:1')]
260
start_rev = opts.revision[0]
261
start_revid = start_rev.as_revision_id(branch)
262
if start_revid == b'null:':
264
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
265
if len(opts.revision) == 2:
266
end_rev = opts.revision[1]
267
end_revid = end_rev.as_revision_id(branch)
268
if end_revid is None:
269
end_revno, end_revid = branch.last_revision_info()
270
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
272
grep_mainline = (_rev_on_mainline(srevno_tuple)
273
and _rev_on_mainline(erevno_tuple))
275
# ensure that we go in reverse order
276
if srevno_tuple > erevno_tuple:
277
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
278
start_revid, end_revid = end_revid, start_revid
280
# Optimization: Traversing the mainline in reverse order is much
281
# faster when we don't want to look at merged revs. We try this
282
# with _linear_view_revisions. If all revs are to be grepped we
283
# use the slower _graph_view_revisions
284
if opts.levels == 1 and grep_mainline:
285
given_revs = _linear_view_revisions(
286
branch, start_revid, end_revid)
288
given_revs = _graph_view_revisions(
289
branch, start_revid, end_revid)
291
# We do an optimization below. For grepping a specific revison
292
# We don't need to call _graph_view_revisions which is slow.
293
# We create the start_rev_tuple for only that specific revision.
294
# _graph_view_revisions is used only for revision range.
295
start_revno = '.'.join(map(str, srevno_tuple))
296
start_rev_tuple = (start_revid, start_revno, 0)
297
given_revs = [start_rev_tuple]
298
repo = branch.repository
299
diff_pattern = re.compile(
300
b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
301
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
302
outputter = _GrepDiffOutputter(opts)
303
writeline = outputter.get_writer()
304
writerevno = outputter.get_revision_header_writer()
305
writefileheader = outputter.get_file_header_writer()
306
file_encoding = _user_encoding
307
for revid, revno, merge_depth in given_revs:
308
if opts.levels == 1 and merge_depth != 0:
309
# with level=1 show only top level
312
rev_spec = RevisionSpec_revid.from_string(
313
"revid:" + revid.decode('utf-8'))
314
new_rev = repo.get_revision(revid)
315
new_tree = rev_spec.as_tree(branch)
316
if len(new_rev.parent_ids) == 0:
317
ancestor_id = _mod_revision.NULL_REVISION
319
ancestor_id = new_rev.parent_ids[0]
320
old_tree = repo.revision_tree(ancestor_id)
322
diff.show_diff_trees(old_tree, new_tree, s,
323
old_label='', new_label='')
328
for line in text.splitlines():
329
if file_pattern.search(line):
332
elif diff_pattern.search(line):
334
writerevno("=== revno:%s ===" % (revno,))
335
display_revno = False
338
" %s" % (file_header.decode(file_encoding, 'replace'),))
340
line = line.decode(file_encoding, 'replace')
341
writeline(" %s" % (line,))
344
def versioned_grep(opts):
345
wt, branch, relpath = \
346
controldir.ControlDir.open_containing_tree_or_branch('.')
347
with branch.lock_read():
348
start_rev = opts.revision[0]
349
start_revid = start_rev.as_revision_id(branch)
350
if start_revid is None:
351
start_rev = RevisionSpec_revno.from_string("revno:1")
352
start_revid = start_rev.as_revision_id(branch)
353
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
355
if len(opts.revision) == 2:
356
end_rev = opts.revision[1]
357
end_revid = end_rev.as_revision_id(branch)
358
if end_revid is None:
359
end_revno, end_revid = branch.last_revision_info()
360
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
362
grep_mainline = (_rev_on_mainline(srevno_tuple)
363
and _rev_on_mainline(erevno_tuple))
365
# ensure that we go in reverse order
366
if srevno_tuple > erevno_tuple:
367
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
368
start_revid, end_revid = end_revid, start_revid
370
# Optimization: Traversing the mainline in reverse order is much
371
# faster when we don't want to look at merged revs. We try this
372
# with _linear_view_revisions. If all revs are to be grepped we
373
# use the slower _graph_view_revisions
374
if opts.levels == 1 and grep_mainline:
375
given_revs = _linear_view_revisions(
376
branch, start_revid, end_revid)
378
given_revs = _graph_view_revisions(
379
branch, start_revid, end_revid)
381
# We do an optimization below. For grepping a specific revison
382
# We don't need to call _graph_view_revisions which is slow.
383
# We create the start_rev_tuple for only that specific revision.
384
# _graph_view_revisions is used only for revision range.
385
start_revno = '.'.join(map(str, srevno_tuple))
386
start_rev_tuple = (start_revid, start_revno, 0)
387
given_revs = [start_rev_tuple]
389
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
390
opts.outputter = _Outputter(opts, use_cache=True)
392
for revid, revno, merge_depth in given_revs:
393
if opts.levels == 1 and merge_depth != 0:
394
# with level=1 show only top level
397
rev = RevisionSpec_revid.from_string(
398
"revid:" + revid.decode('utf-8'))
399
tree = rev.as_tree(branch)
400
for path in opts.path_list:
401
tree_path = osutils.pathjoin(relpath, path)
402
if not tree.has_filename(tree_path):
403
trace.warning("Skipped unknown file '%s'.", path)
406
if osutils.isdir(path):
408
dir_grep(tree, path, relpath, opts, revno, path_prefix)
411
tree, tree_path, '.', path, opts, revno)
414
def workingtree_grep(opts):
415
revno = opts.print_revno = None # for working tree set revno to None
417
tree, branch, relpath = \
418
controldir.ControlDir.open_containing_tree_or_branch('.')
420
msg = ('Cannot search working tree. Working tree not found.\n'
421
'To search for specific revision in history use the -r option.')
422
raise errors.CommandError(msg)
424
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
425
opts.outputter = _Outputter(opts)
427
with tree.lock_read():
428
for path in opts.path_list:
429
if osutils.isdir(path):
431
dir_grep(tree, path, relpath, opts, revno, path_prefix)
433
with open(path, 'rb') as f:
434
_file_grep(f.read(), path, opts, revno)
437
def _skip_file(include, exclude, path):
438
if include and not _path_in_glob_list(path, include):
440
if exclude and _path_in_glob_list(path, exclude):
445
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
446
# setup relpath to open files relative to cwd
449
rpath = osutils.pathjoin('..', relpath)
451
from_dir = osutils.pathjoin(relpath, path)
453
# start searching recursively from root
458
to_grep_append = to_grep.append
459
# GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
460
# and hits manually refilled. Could do this again if it was
461
# for a good reason, otherwise cache might want purging.
462
outputter = opts.outputter
463
for fp, fc, fkind, entry in tree.list_files(
464
include_root=False, from_dir=from_dir, recursive=opts.recursive):
466
if _skip_file(opts.include, opts.exclude, fp):
469
if fc == 'V' and fkind == 'file':
470
tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
471
if revno is not None:
472
# If old result is valid, print results immediately.
473
# Otherwise, add file info to to_grep so that the
474
# loop later will get chunks and grep them
475
cache_id = tree.get_file_revision(tree_path)
476
if cache_id in outputter.cache:
477
# GZ 2010-06-05: Not really sure caching and re-outputting
478
# the old path is really the right thing,
479
# but it's what the old code seemed to do
480
outputter.write_cached_lines(cache_id, revno)
482
to_grep_append((tree_path, (fp, tree_path)))
484
# we are grepping working tree.
488
path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
489
if opts.files_with_matches or opts.files_without_match:
490
# Optimize for wtree list-only as we don't need to read the
492
with open(path_for_file, 'rb', buffering=4096) as file:
493
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
495
with open(path_for_file, 'rb') as f:
496
_file_grep(f.read(), fp, opts, revno, path_prefix)
498
if revno is not None: # grep versioned files
499
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
500
path = _make_display_path(relpath, path)
501
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
502
tree.get_file_revision(tree_path))
505
def _make_display_path(relpath, path):
506
"""Return path string relative to user cwd.
508
Take tree's 'relpath' and user supplied 'path', and return path
509
that can be displayed to the user.
512
# update path so to display it w.r.t cwd
513
# handle windows slash separator
514
path = osutils.normpath(osutils.pathjoin(relpath, path))
515
path = path.replace('\\', '/')
516
path = path.replace(relpath + '/', '', 1)
520
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix=None):
521
"""Create a file object for the specified id and pass it on to _file_grep.
524
path = _make_display_path(relpath, path)
525
file_text = tree.get_file_text(tree_path)
526
_file_grep(file_text, path, opts, revno, path_prefix)
529
def _path_in_glob_list(path, glob_list):
530
for glob in glob_list:
531
if fnmatch(path, glob):
536
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
537
# test and skip binary files
538
if b'\x00' in file.read(1024):
540
trace.warning("Binary file '%s' skipped.", path)
543
file.seek(0) # search from beginning
546
if opts.fixed_string:
547
pattern = opts.pattern.encode(_user_encoding, 'replace')
552
else: # not fixed_string
554
if opts.patternc.search(line):
558
if (opts.files_with_matches and found) or \
559
(opts.files_without_match and not found):
560
if path_prefix and path_prefix != '.':
561
# user has passed a dir arg, show that as result prefix
562
path = osutils.pathjoin(path_prefix, path)
563
opts.outputter.get_writer(path, None, None)()
566
class _Outputter(object):
567
"""Precalculate formatting based on options given
569
The idea here is to do this work only once per run, and finally return a
570
function that will do the minimum amount possible for each match.
573
def __init__(self, opts, use_cache=False):
574
self.outf = opts.outf
576
# self.cache is used to cache results for dir grep based on fid.
577
# If the fid is does not change between results, it means that
578
# the result will be the same apart from revno. In such a case
579
# we avoid getting file chunks from repo and grepping. The result
580
# is just printed by replacing old revno with new one.
584
no_line = opts.files_with_matches or opts.files_without_match
588
self.get_writer = self._get_writer_plain
589
elif opts.fixed_string:
590
self._old = opts.pattern
591
self._new = color_string(opts.pattern, FG.BOLD_RED)
592
self.get_writer = self._get_writer_fixed_highlighted
594
flags = opts.patternc.flags
595
self._sub = re.compile(
596
opts.pattern.join(("((?:", ")+)")), flags).sub
597
self._highlight = color_string("\\1", FG.BOLD_RED)
598
self.get_writer = self._get_writer_regexp_highlighted
599
path_start = FG.MAGENTA
601
sep = color_string(':', FG.BOLD_CYAN)
602
rev_sep = color_string('~', FG.BOLD_YELLOW)
604
self.get_writer = self._get_writer_plain
605
path_start = path_end = ""
609
parts = [path_start, "%(path)s"]
611
parts.extend([rev_sep, "%(revno)s"])
612
self._format_initial = "".join(parts)
615
if not opts.print_revno:
616
parts.append(path_end)
619
parts.extend([sep, "%(lineno)s"])
620
parts.extend([sep, "%(line)s"])
621
parts.append(opts.eol_marker)
622
self._format_perline = "".join(parts)
624
def _get_writer_plain(self, path, revno, cache_id):
625
"""Get function for writing uncoloured output"""
626
per_line = self._format_perline
627
start = self._format_initial % {"path": path, "revno": revno}
628
write = self.outf.write
629
if self.cache is not None and cache_id is not None:
631
self.cache[cache_id] = path, result_list
632
add_to_cache = result_list.append
634
def _line_cache_and_writer(**kwargs):
635
"""Write formatted line and cache arguments"""
636
end = per_line % kwargs
639
return _line_cache_and_writer
641
def _line_writer(**kwargs):
642
"""Write formatted line from arguments given by underlying opts"""
643
write(start + per_line % kwargs)
646
def write_cached_lines(self, cache_id, revno):
647
"""Write cached results out again for new revision"""
648
cached_path, cached_matches = self.cache[cache_id]
649
start = self._format_initial % {"path": cached_path, "revno": revno}
650
write = self.outf.write
651
for end in cached_matches:
654
def _get_writer_regexp_highlighted(self, path, revno, cache_id):
655
"""Get function for writing output with regexp match highlighted"""
656
_line_writer = self._get_writer_plain(path, revno, cache_id)
657
sub, highlight = self._sub, self._highlight
659
def _line_writer_regexp_highlighted(line, **kwargs):
660
"""Write formatted line with matched pattern highlighted"""
661
return _line_writer(line=sub(highlight, line), **kwargs)
662
return _line_writer_regexp_highlighted
664
def _get_writer_fixed_highlighted(self, path, revno, cache_id):
665
"""Get function for writing output with search string highlighted"""
666
_line_writer = self._get_writer_plain(path, revno, cache_id)
667
old, new = self._old, self._new
669
def _line_writer_fixed_highlighted(line, **kwargs):
670
"""Write formatted line with string searched for highlighted"""
671
return _line_writer(line=line.replace(old, new), **kwargs)
672
return _line_writer_fixed_highlighted
675
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
676
# test and skip binary files
677
if b'\x00' in file_text[:1024]:
679
trace.warning("Binary file '%s' skipped.", path)
682
if path_prefix and path_prefix != '.':
683
# user has passed a dir arg, show that as result prefix
684
path = osutils.pathjoin(path_prefix, path)
686
# GZ 2010-06-07: There's no actual guarentee the file contents will be in
687
# the user encoding, but we have to guess something and it
688
# is a reasonable default without a better mechanism.
689
file_encoding = _user_encoding
690
pattern = opts.pattern.encode(_user_encoding, 'replace')
692
writeline = opts.outputter.get_writer(path, revno, cache_id)
694
if opts.files_with_matches or opts.files_without_match:
695
if opts.fixed_string:
696
found = pattern in file_text
698
search = opts.patternc.search
699
if b"$" not in pattern:
700
found = search(file_text) is not None
702
for line in file_text.splitlines():
708
if (opts.files_with_matches and found) or \
709
(opts.files_without_match and not found):
711
elif opts.fixed_string:
712
# Fast path for no match, search through the entire file at once rather
713
# than a line at a time. <http://effbot.org/zone/stringlib.htm>
714
i = file_text.find(pattern)
717
b = file_text.rfind(b"\n", 0, i) + 1
719
start = file_text.count(b"\n", 0, b) + 1
720
file_text = file_text[b:]
722
for index, line in enumerate(file_text.splitlines()):
724
line = line.decode(file_encoding, 'replace')
725
writeline(lineno=index + start, line=line)
727
for line in file_text.splitlines():
729
line = line.decode(file_encoding, 'replace')
732
# Fast path on no match, the re module avoids bad behaviour in most
733
# standard cases, but perhaps could try and detect backtracking
734
# patterns here and avoid whole text search in those cases
735
search = opts.patternc.search
736
if b"$" not in pattern:
737
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
738
# through revisions as bazaar returns binary mode
739
# and trailing \r breaks $ as line ending match
740
m = search(file_text)
743
b = file_text.rfind(b"\n", 0, m.start()) + 1
745
start = file_text.count(b"\n", 0, b) + 1
746
file_text = file_text[b:]
750
for index, line in enumerate(file_text.splitlines()):
752
line = line.decode(file_encoding, 'replace')
753
writeline(lineno=index + start, line=line)
755
for line in file_text.splitlines():
757
line = line.decode(file_encoding, 'replace')