1
# Copyright (C) 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
19
from io import BytesIO
22
from .lazy_import import lazy_import
23
lazy_import(globals(), """
24
from fnmatch import fnmatch
26
from breezy._termcolor import color_string, FG
36
revision as _mod_revision,
39
from .revisionspec import (
45
_user_encoding = osutils.get_user_encoding()
48
class _RevisionNotLinear(Exception):
49
"""Raised when a revision is not on left-hand history."""
52
class GrepOptions(object):
53
"""Container to pass around grep options.
55
This class is used as a container to pass around user option and
56
some other params (like outf) to processing functions. This makes
57
it easier to add more options as grep evolves.
72
files_with_matches = False
73
files_without_match = False
88
def _rev_on_mainline(rev_tuple):
89
"""returns True is rev tuple is on mainline"""
90
if len(rev_tuple) == 1:
92
return rev_tuple[1] == 0 and rev_tuple[2] == 0
95
# NOTE: _linear_view_revisions is basided on
96
# breezy.log._linear_view_revisions.
97
# This should probably be a common public API
98
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
99
# requires that start is older than end
100
repo = branch.repository
101
graph = repo.get_graph()
102
for revision_id in graph.iter_lefthand_ancestry(
103
end_rev_id, (_mod_revision.NULL_REVISION, )):
104
revno = branch.revision_id_to_dotted_revno(revision_id)
105
revno_str = '.'.join(str(n) for n in revno)
106
if revision_id == start_rev_id:
107
yield revision_id, revno_str, 0
109
yield revision_id, revno_str, 0
112
# NOTE: _graph_view_revisions is copied from
113
# breezy.log._graph_view_revisions.
114
# This should probably be a common public API
115
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
116
rebase_initial_depths=True):
117
"""Calculate revisions to view including merges, newest to oldest.
119
:param branch: the branch
120
:param start_rev_id: the lower revision-id
121
:param end_rev_id: the upper revision-id
122
:param rebase_initial_depth: should depths be rebased until a mainline
124
:return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
126
# requires that start is older than end
127
view_revisions = branch.iter_merge_sorted_revisions(
128
start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
129
stop_rule="with-merges")
130
if not rebase_initial_depths:
131
for (rev_id, merge_depth, revno, end_of_merge
133
yield rev_id, '.'.join(map(str, revno)), merge_depth
135
# We're following a development line starting at a merged revision.
136
# We need to adjust depths down by the initial depth until we find
137
# a depth less than it. Then we use that depth as the adjustment.
138
# If and when we reach the mainline, depth adjustment ends.
139
depth_adjustment = None
140
for (rev_id, merge_depth, revno, end_of_merge
142
if depth_adjustment is None:
143
depth_adjustment = merge_depth
145
if merge_depth < depth_adjustment:
146
# From now on we reduce the depth adjustement, this can be
147
# surprising for users. The alternative requires two passes
148
# which breaks the fast display of the first revision
150
depth_adjustment = merge_depth
151
merge_depth -= depth_adjustment
152
yield rev_id, '.'.join(map(str, revno)), merge_depth
155
def compile_pattern(pattern, flags=0):
157
return re.compile(pattern, flags)
158
except re.error as e:
159
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
163
def is_fixed_string(s):
164
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
169
class _GrepDiffOutputter(object):
170
"""Precalculate formatting based on options given for diff grep.
173
def __init__(self, opts):
175
self.outf = opts.outf
177
if opts.fixed_string:
178
self._old = opts.pattern
179
self._new = color_string(opts.pattern, FG.BOLD_RED)
180
self.get_writer = self._get_writer_fixed_highlighted
182
flags = opts.patternc.flags
183
self._sub = re.compile(
184
opts.pattern.join(("((?:", ")+)")), flags).sub
185
self._highlight = color_string("\\1", FG.BOLD_RED)
186
self.get_writer = self._get_writer_regexp_highlighted
188
self.get_writer = self._get_writer_plain
190
def get_file_header_writer(self):
191
"""Get function for writing file headers"""
192
write = self.outf.write
193
eol_marker = self.opts.eol_marker
195
def _line_writer(line):
196
write(line + eol_marker)
198
def _line_writer_color(line):
199
write(FG.BOLD_MAGENTA + line + FG.NONE + eol_marker)
200
if self.opts.show_color:
201
return _line_writer_color
206
def get_revision_header_writer(self):
207
"""Get function for writing revno lines"""
208
write = self.outf.write
209
eol_marker = self.opts.eol_marker
211
def _line_writer(line):
212
write(line + eol_marker)
214
def _line_writer_color(line):
215
write(FG.BOLD_BLUE + line + FG.NONE + eol_marker)
216
if self.opts.show_color:
217
return _line_writer_color
222
def _get_writer_plain(self):
223
"""Get function for writing uncoloured output"""
224
write = self.outf.write
225
eol_marker = self.opts.eol_marker
227
def _line_writer(line):
228
write(line + eol_marker)
231
def _get_writer_regexp_highlighted(self):
232
"""Get function for writing output with regexp match highlighted"""
233
_line_writer = self._get_writer_plain()
234
sub, highlight = self._sub, self._highlight
236
def _line_writer_regexp_highlighted(line):
237
"""Write formatted line with matched pattern highlighted"""
238
return _line_writer(line=sub(highlight, line))
239
return _line_writer_regexp_highlighted
241
def _get_writer_fixed_highlighted(self):
242
"""Get function for writing output with search string highlighted"""
243
_line_writer = self._get_writer_plain()
244
old, new = self._old, self._new
246
def _line_writer_fixed_highlighted(line):
247
"""Write formatted line with string searched for highlighted"""
248
return _line_writer(line=line.replace(old, new))
249
return _line_writer_fixed_highlighted
253
wt, branch, relpath = \
254
controldir.ControlDir.open_containing_tree_or_branch('.')
255
with branch.lock_read():
257
start_rev = opts.revision[0]
259
# if no revision is sepcified for diff grep we grep all changesets.
260
opts.revision = [RevisionSpec.from_string('revno:1'),
261
RevisionSpec.from_string('last:1')]
262
start_rev = opts.revision[0]
263
start_revid = start_rev.as_revision_id(branch)
264
if start_revid == b'null:':
266
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
267
if len(opts.revision) == 2:
268
end_rev = opts.revision[1]
269
end_revid = end_rev.as_revision_id(branch)
270
if end_revid is None:
271
end_revno, end_revid = branch.last_revision_info()
272
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
274
grep_mainline = (_rev_on_mainline(srevno_tuple)
275
and _rev_on_mainline(erevno_tuple))
277
# ensure that we go in reverse order
278
if srevno_tuple > erevno_tuple:
279
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
280
start_revid, end_revid = end_revid, start_revid
282
# Optimization: Traversing the mainline in reverse order is much
283
# faster when we don't want to look at merged revs. We try this
284
# with _linear_view_revisions. If all revs are to be grepped we
285
# use the slower _graph_view_revisions
286
if opts.levels == 1 and grep_mainline:
287
given_revs = _linear_view_revisions(
288
branch, start_revid, end_revid)
290
given_revs = _graph_view_revisions(
291
branch, start_revid, end_revid)
293
# We do an optimization below. For grepping a specific revison
294
# We don't need to call _graph_view_revisions which is slow.
295
# We create the start_rev_tuple for only that specific revision.
296
# _graph_view_revisions is used only for revision range.
297
start_revno = '.'.join(map(str, srevno_tuple))
298
start_rev_tuple = (start_revid, start_revno, 0)
299
given_revs = [start_rev_tuple]
300
repo = branch.repository
301
diff_pattern = re.compile(
302
b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
303
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
304
outputter = _GrepDiffOutputter(opts)
305
writeline = outputter.get_writer()
306
writerevno = outputter.get_revision_header_writer()
307
writefileheader = outputter.get_file_header_writer()
308
file_encoding = _user_encoding
309
for revid, revno, merge_depth in given_revs:
310
if opts.levels == 1 and merge_depth != 0:
311
# with level=1 show only top level
314
rev_spec = RevisionSpec_revid.from_string(
315
"revid:" + revid.decode('utf-8'))
316
new_rev = repo.get_revision(revid)
317
new_tree = rev_spec.as_tree(branch)
318
if len(new_rev.parent_ids) == 0:
319
ancestor_id = _mod_revision.NULL_REVISION
321
ancestor_id = new_rev.parent_ids[0]
322
old_tree = repo.revision_tree(ancestor_id)
324
diff.show_diff_trees(old_tree, new_tree, s,
325
old_label='', new_label='')
330
for line in text.splitlines():
331
if file_pattern.search(line):
334
elif diff_pattern.search(line):
336
writerevno("=== revno:%s ===" % (revno,))
337
display_revno = False
340
" %s" % (file_header.decode(file_encoding, 'replace'),))
342
line = line.decode(file_encoding, 'replace')
343
writeline(" %s" % (line,))
346
def versioned_grep(opts):
347
wt, branch, relpath = \
348
controldir.ControlDir.open_containing_tree_or_branch('.')
349
with branch.lock_read():
350
start_rev = opts.revision[0]
351
start_revid = start_rev.as_revision_id(branch)
352
if start_revid is None:
353
start_rev = RevisionSpec_revno.from_string("revno:1")
354
start_revid = start_rev.as_revision_id(branch)
355
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
357
if len(opts.revision) == 2:
358
end_rev = opts.revision[1]
359
end_revid = end_rev.as_revision_id(branch)
360
if end_revid is None:
361
end_revno, end_revid = branch.last_revision_info()
362
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
364
grep_mainline = (_rev_on_mainline(srevno_tuple)
365
and _rev_on_mainline(erevno_tuple))
367
# ensure that we go in reverse order
368
if srevno_tuple > erevno_tuple:
369
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
370
start_revid, end_revid = end_revid, start_revid
372
# Optimization: Traversing the mainline in reverse order is much
373
# faster when we don't want to look at merged revs. We try this
374
# with _linear_view_revisions. If all revs are to be grepped we
375
# use the slower _graph_view_revisions
376
if opts.levels == 1 and grep_mainline:
377
given_revs = _linear_view_revisions(
378
branch, start_revid, end_revid)
380
given_revs = _graph_view_revisions(
381
branch, start_revid, end_revid)
383
# We do an optimization below. For grepping a specific revison
384
# We don't need to call _graph_view_revisions which is slow.
385
# We create the start_rev_tuple for only that specific revision.
386
# _graph_view_revisions is used only for revision range.
387
start_revno = '.'.join(map(str, srevno_tuple))
388
start_rev_tuple = (start_revid, start_revno, 0)
389
given_revs = [start_rev_tuple]
391
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
392
opts.outputter = _Outputter(opts, use_cache=True)
394
for revid, revno, merge_depth in given_revs:
395
if opts.levels == 1 and merge_depth != 0:
396
# with level=1 show only top level
399
rev = RevisionSpec_revid.from_string(
400
"revid:" + revid.decode('utf-8'))
401
tree = rev.as_tree(branch)
402
for path in opts.path_list:
403
tree_path = osutils.pathjoin(relpath, path)
404
if not tree.has_filename(tree_path):
405
trace.warning("Skipped unknown file '%s'.", path)
408
if osutils.isdir(path):
410
dir_grep(tree, path, relpath, opts, revno, path_prefix)
413
tree, tree_path, '.', path, opts, revno)
416
def workingtree_grep(opts):
417
revno = opts.print_revno = None # for working tree set revno to None
419
tree, branch, relpath = \
420
controldir.ControlDir.open_containing_tree_or_branch('.')
422
msg = ('Cannot search working tree. Working tree not found.\n'
423
'To search for specific revision in history use the -r option.')
424
raise errors.BzrCommandError(msg)
426
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
427
opts.outputter = _Outputter(opts)
429
with tree.lock_read():
430
for path in opts.path_list:
431
if osutils.isdir(path):
433
dir_grep(tree, path, relpath, opts, revno, path_prefix)
435
with open(path, 'rb') as f:
436
_file_grep(f.read(), path, opts, revno)
439
def _skip_file(include, exclude, path):
440
if include and not _path_in_glob_list(path, include):
442
if exclude and _path_in_glob_list(path, exclude):
447
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
448
# setup relpath to open files relative to cwd
451
rpath = osutils.pathjoin('..', relpath)
453
from_dir = osutils.pathjoin(relpath, path)
455
# start searching recursively from root
460
to_grep_append = to_grep.append
461
# GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
462
# and hits manually refilled. Could do this again if it was
463
# for a good reason, otherwise cache might want purging.
464
outputter = opts.outputter
465
for fp, fc, fkind, entry in tree.list_files(
466
include_root=False, from_dir=from_dir, recursive=opts.recursive):
468
if _skip_file(opts.include, opts.exclude, fp):
471
if fc == 'V' and fkind == 'file':
472
tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
473
if revno is not None:
474
# If old result is valid, print results immediately.
475
# Otherwise, add file info to to_grep so that the
476
# loop later will get chunks and grep them
477
cache_id = tree.get_file_revision(tree_path)
478
if cache_id in outputter.cache:
479
# GZ 2010-06-05: Not really sure caching and re-outputting
480
# the old path is really the right thing,
481
# but it's what the old code seemed to do
482
outputter.write_cached_lines(cache_id, revno)
484
to_grep_append((tree_path, (fp, tree_path)))
486
# we are grepping working tree.
490
path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
491
if opts.files_with_matches or opts.files_without_match:
492
# Optimize for wtree list-only as we don't need to read the
494
with open(path_for_file, 'rb', buffering=4096) as file:
495
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
497
with open(path_for_file, 'rb') as f:
498
_file_grep(f.read(), fp, opts, revno, path_prefix)
500
if revno is not None: # grep versioned files
501
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
502
path = _make_display_path(relpath, path)
503
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
504
tree.get_file_revision(tree_path))
507
def _make_display_path(relpath, path):
508
"""Return path string relative to user cwd.
510
Take tree's 'relpath' and user supplied 'path', and return path
511
that can be displayed to the user.
514
# update path so to display it w.r.t cwd
515
# handle windows slash separator
516
path = osutils.normpath(osutils.pathjoin(relpath, path))
517
path = path.replace('\\', '/')
518
path = path.replace(relpath + '/', '', 1)
522
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix=None):
523
"""Create a file object for the specified id and pass it on to _file_grep.
526
path = _make_display_path(relpath, path)
527
file_text = tree.get_file_text(tree_path)
528
_file_grep(file_text, path, opts, revno, path_prefix)
531
def _path_in_glob_list(path, glob_list):
532
for glob in glob_list:
533
if fnmatch(path, glob):
538
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
539
# test and skip binary files
540
if b'\x00' in file.read(1024):
542
trace.warning("Binary file '%s' skipped.", path)
545
file.seek(0) # search from beginning
548
if opts.fixed_string:
549
pattern = opts.pattern.encode(_user_encoding, 'replace')
554
else: # not fixed_string
556
if opts.patternc.search(line):
560
if (opts.files_with_matches and found) or \
561
(opts.files_without_match and not found):
562
if path_prefix and path_prefix != '.':
563
# user has passed a dir arg, show that as result prefix
564
path = osutils.pathjoin(path_prefix, path)
565
opts.outputter.get_writer(path, None, None)()
568
class _Outputter(object):
569
"""Precalculate formatting based on options given
571
The idea here is to do this work only once per run, and finally return a
572
function that will do the minimum amount possible for each match.
575
def __init__(self, opts, use_cache=False):
576
self.outf = opts.outf
578
# self.cache is used to cache results for dir grep based on fid.
579
# If the fid is does not change between results, it means that
580
# the result will be the same apart from revno. In such a case
581
# we avoid getting file chunks from repo and grepping. The result
582
# is just printed by replacing old revno with new one.
586
no_line = opts.files_with_matches or opts.files_without_match
590
self.get_writer = self._get_writer_plain
591
elif opts.fixed_string:
592
self._old = opts.pattern
593
self._new = color_string(opts.pattern, FG.BOLD_RED)
594
self.get_writer = self._get_writer_fixed_highlighted
596
flags = opts.patternc.flags
597
self._sub = re.compile(
598
opts.pattern.join(("((?:", ")+)")), flags).sub
599
self._highlight = color_string("\\1", FG.BOLD_RED)
600
self.get_writer = self._get_writer_regexp_highlighted
601
path_start = FG.MAGENTA
603
sep = color_string(':', FG.BOLD_CYAN)
604
rev_sep = color_string('~', FG.BOLD_YELLOW)
606
self.get_writer = self._get_writer_plain
607
path_start = path_end = ""
611
parts = [path_start, "%(path)s"]
613
parts.extend([rev_sep, "%(revno)s"])
614
self._format_initial = "".join(parts)
617
if not opts.print_revno:
618
parts.append(path_end)
621
parts.extend([sep, "%(lineno)s"])
622
parts.extend([sep, "%(line)s"])
623
parts.append(opts.eol_marker)
624
self._format_perline = "".join(parts)
626
def _get_writer_plain(self, path, revno, cache_id):
627
"""Get function for writing uncoloured output"""
628
per_line = self._format_perline
629
start = self._format_initial % {"path": path, "revno": revno}
630
write = self.outf.write
631
if self.cache is not None and cache_id is not None:
633
self.cache[cache_id] = path, result_list
634
add_to_cache = result_list.append
636
def _line_cache_and_writer(**kwargs):
637
"""Write formatted line and cache arguments"""
638
end = per_line % kwargs
641
return _line_cache_and_writer
643
def _line_writer(**kwargs):
644
"""Write formatted line from arguments given by underlying opts"""
645
write(start + per_line % kwargs)
648
def write_cached_lines(self, cache_id, revno):
649
"""Write cached results out again for new revision"""
650
cached_path, cached_matches = self.cache[cache_id]
651
start = self._format_initial % {"path": cached_path, "revno": revno}
652
write = self.outf.write
653
for end in cached_matches:
656
def _get_writer_regexp_highlighted(self, path, revno, cache_id):
657
"""Get function for writing output with regexp match highlighted"""
658
_line_writer = self._get_writer_plain(path, revno, cache_id)
659
sub, highlight = self._sub, self._highlight
661
def _line_writer_regexp_highlighted(line, **kwargs):
662
"""Write formatted line with matched pattern highlighted"""
663
return _line_writer(line=sub(highlight, line), **kwargs)
664
return _line_writer_regexp_highlighted
666
def _get_writer_fixed_highlighted(self, path, revno, cache_id):
667
"""Get function for writing output with search string highlighted"""
668
_line_writer = self._get_writer_plain(path, revno, cache_id)
669
old, new = self._old, self._new
671
def _line_writer_fixed_highlighted(line, **kwargs):
672
"""Write formatted line with string searched for highlighted"""
673
return _line_writer(line=line.replace(old, new), **kwargs)
674
return _line_writer_fixed_highlighted
677
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
678
# test and skip binary files
679
if b'\x00' in file_text[:1024]:
681
trace.warning("Binary file '%s' skipped.", path)
684
if path_prefix and path_prefix != '.':
685
# user has passed a dir arg, show that as result prefix
686
path = osutils.pathjoin(path_prefix, path)
688
# GZ 2010-06-07: There's no actual guarentee the file contents will be in
689
# the user encoding, but we have to guess something and it
690
# is a reasonable default without a better mechanism.
691
file_encoding = _user_encoding
692
pattern = opts.pattern.encode(_user_encoding, 'replace')
694
writeline = opts.outputter.get_writer(path, revno, cache_id)
696
if opts.files_with_matches or opts.files_without_match:
697
if opts.fixed_string:
698
found = pattern in file_text
700
search = opts.patternc.search
701
if b"$" not in pattern:
702
found = search(file_text) is not None
704
for line in file_text.splitlines():
710
if (opts.files_with_matches and found) or \
711
(opts.files_without_match and not found):
713
elif opts.fixed_string:
714
# Fast path for no match, search through the entire file at once rather
715
# than a line at a time. <http://effbot.org/zone/stringlib.htm>
716
i = file_text.find(pattern)
719
b = file_text.rfind(b"\n", 0, i) + 1
721
start = file_text.count(b"\n", 0, b) + 1
722
file_text = file_text[b:]
724
for index, line in enumerate(file_text.splitlines()):
726
line = line.decode(file_encoding, 'replace')
727
writeline(lineno=index + start, line=line)
729
for line in file_text.splitlines():
731
line = line.decode(file_encoding, 'replace')
734
# Fast path on no match, the re module avoids bad behaviour in most
735
# standard cases, but perhaps could try and detect backtracking
736
# patterns here and avoid whole text search in those cases
737
search = opts.patternc.search
738
if b"$" not in pattern:
739
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
740
# through revisions as bazaar returns binary mode
741
# and trailing \r breaks $ as line ending match
742
m = search(file_text)
745
b = file_text.rfind(b"\n", 0, m.start()) + 1
747
start = file_text.count(b"\n", 0, b) + 1
748
file_text = file_text[b:]
752
for index, line in enumerate(file_text.splitlines()):
754
line = line.decode(file_encoding, 'replace')
755
writeline(lineno=index + start, line=line)
757
for line in file_text.splitlines():
759
line = line.decode(file_encoding, 'replace')