1
# Copyright (C) 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from __future__ import absolute_import
22
from ...lazy_import import lazy_import
23
lazy_import(globals(), """
24
from fnmatch import fnmatch
26
from breezy._termcolor import color_string, FG
33
revision as _mod_revision,
40
from breezy.revisionspec import (
45
from breezy.sixish import (
49
_user_encoding = osutils.get_user_encoding()
52
class _RevisionNotLinear(Exception):
53
"""Raised when a revision is not on left-hand history."""
56
def _rev_on_mainline(rev_tuple):
57
"""returns True is rev tuple is on mainline"""
58
if len(rev_tuple) == 1:
60
return rev_tuple[1] == 0 and rev_tuple[2] == 0
63
# NOTE: _linear_view_revisions is basided on
64
# breezy.log._linear_view_revisions.
65
# This should probably be a common public API
66
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
67
# requires that start is older than end
68
repo = branch.repository
69
graph = repo.get_graph()
70
for revision_id in graph.iter_lefthand_ancestry(
71
end_rev_id, (_mod_revision.NULL_REVISION, )):
72
revno = branch.revision_id_to_dotted_revno(revision_id)
73
revno_str = '.'.join(str(n) for n in revno)
74
if revision_id == start_rev_id:
75
yield revision_id, revno_str, 0
77
yield revision_id, revno_str, 0
80
# NOTE: _graph_view_revisions is copied from
81
# breezy.log._graph_view_revisions.
82
# This should probably be a common public API
83
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
84
rebase_initial_depths=True):
85
"""Calculate revisions to view including merges, newest to oldest.
87
:param branch: the branch
88
:param start_rev_id: the lower revision-id
89
:param end_rev_id: the upper revision-id
90
:param rebase_initial_depth: should depths be rebased until a mainline
92
:return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
94
# requires that start is older than end
95
view_revisions = branch.iter_merge_sorted_revisions(
96
start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
97
stop_rule="with-merges")
98
if not rebase_initial_depths:
99
for (rev_id, merge_depth, revno, end_of_merge
101
yield rev_id, '.'.join(map(str, revno)), merge_depth
103
# We're following a development line starting at a merged revision.
104
# We need to adjust depths down by the initial depth until we find
105
# a depth less than it. Then we use that depth as the adjustment.
106
# If and when we reach the mainline, depth adjustment ends.
107
depth_adjustment = None
108
for (rev_id, merge_depth, revno, end_of_merge
110
if depth_adjustment is None:
111
depth_adjustment = merge_depth
113
if merge_depth < depth_adjustment:
114
# From now on we reduce the depth adjustement, this can be
115
# surprising for users. The alternative requires two passes
116
# which breaks the fast display of the first revision
118
depth_adjustment = merge_depth
119
merge_depth -= depth_adjustment
120
yield rev_id, '.'.join(map(str, revno)), merge_depth
123
def compile_pattern(pattern, flags=0):
126
# use python's re.compile as we need to catch re.error in case of bad pattern
127
lazy_regex.reset_compile()
128
patternc = re.compile(pattern, flags)
129
except re.error as e:
130
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
134
def is_fixed_string(s):
135
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
140
class _GrepDiffOutputter(object):
141
"""Precalculate formatting based on options given for diff grep.
144
def __init__(self, opts):
146
self.outf = opts.outf
148
if opts.fixed_string:
149
self._old = opts.pattern
150
self._new = color_string(opts.pattern, FG.BOLD_RED)
151
self.get_writer = self._get_writer_fixed_highlighted
153
flags = opts.patternc.flags
154
self._sub = re.compile(opts.pattern.join(("((?:", ")+)")), flags).sub
155
self._highlight = color_string("\\1", FG.BOLD_RED)
156
self.get_writer = self._get_writer_regexp_highlighted
158
self.get_writer = self._get_writer_plain
160
def get_file_header_writer(self):
161
"""Get function for writing file headers"""
162
write = self.outf.write
163
eol_marker = self.opts.eol_marker
164
def _line_writer(line):
165
write(line + eol_marker)
166
def _line_writer_color(line):
167
write(FG.BOLD_MAGENTA + line + FG.NONE + eol_marker)
168
if self.opts.show_color:
169
return _line_writer_color
174
def get_revision_header_writer(self):
175
"""Get function for writing revno lines"""
176
write = self.outf.write
177
eol_marker = self.opts.eol_marker
178
def _line_writer(line):
179
write(line + eol_marker)
180
def _line_writer_color(line):
181
write(FG.BOLD_BLUE + line + FG.NONE + eol_marker)
182
if self.opts.show_color:
183
return _line_writer_color
188
def _get_writer_plain(self):
189
"""Get function for writing uncoloured output"""
190
write = self.outf.write
191
eol_marker = self.opts.eol_marker
192
def _line_writer(line):
193
write(line + eol_marker)
196
def _get_writer_regexp_highlighted(self):
197
"""Get function for writing output with regexp match highlighted"""
198
_line_writer = self._get_writer_plain()
199
sub, highlight = self._sub, self._highlight
200
def _line_writer_regexp_highlighted(line):
201
"""Write formatted line with matched pattern highlighted"""
202
return _line_writer(line=sub(highlight, line))
203
return _line_writer_regexp_highlighted
205
def _get_writer_fixed_highlighted(self):
206
"""Get function for writing output with search string highlighted"""
207
_line_writer = self._get_writer_plain()
208
old, new = self._old, self._new
209
def _line_writer_fixed_highlighted(line):
210
"""Write formatted line with string searched for highlighted"""
211
return _line_writer(line=line.replace(old, new))
212
return _line_writer_fixed_highlighted
216
wt, branch, relpath = \
217
controldir.ControlDir.open_containing_tree_or_branch('.')
218
with branch.lock_read():
220
start_rev = opts.revision[0]
222
# if no revision is sepcified for diff grep we grep all changesets.
223
opts.revision = [RevisionSpec.from_string('revno:1'),
224
RevisionSpec.from_string('last:1')]
225
start_rev = opts.revision[0]
226
start_revid = start_rev.as_revision_id(branch)
227
if start_revid == b'null:':
229
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
230
if len(opts.revision) == 2:
231
end_rev = opts.revision[1]
232
end_revid = end_rev.as_revision_id(branch)
233
if end_revid is None:
234
end_revno, end_revid = branch.last_revision_info()
235
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
237
grep_mainline = (_rev_on_mainline(srevno_tuple) and
238
_rev_on_mainline(erevno_tuple))
240
# ensure that we go in reverse order
241
if srevno_tuple > erevno_tuple:
242
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
243
start_revid, end_revid = end_revid, start_revid
245
# Optimization: Traversing the mainline in reverse order is much
246
# faster when we don't want to look at merged revs. We try this
247
# with _linear_view_revisions. If all revs are to be grepped we
248
# use the slower _graph_view_revisions
249
if opts.levels==1 and grep_mainline:
250
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
252
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
254
# We do an optimization below. For grepping a specific revison
255
# We don't need to call _graph_view_revisions which is slow.
256
# We create the start_rev_tuple for only that specific revision.
257
# _graph_view_revisions is used only for revision range.
258
start_revno = '.'.join(map(str, srevno_tuple))
259
start_rev_tuple = (start_revid, start_revno, 0)
260
given_revs = [start_rev_tuple]
261
repo = branch.repository
262
diff_pattern = re.compile(b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
263
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
264
outputter = _GrepDiffOutputter(opts)
265
writeline = outputter.get_writer()
266
writerevno = outputter.get_revision_header_writer()
267
writefileheader = outputter.get_file_header_writer()
268
file_encoding = _user_encoding
269
for revid, revno, merge_depth in given_revs:
270
if opts.levels == 1 and merge_depth != 0:
271
# with level=1 show only top level
274
rev_spec = RevisionSpec_revid.from_string("revid:"+revid.decode('utf-8'))
275
new_rev = repo.get_revision(revid)
276
new_tree = rev_spec.as_tree(branch)
277
if len(new_rev.parent_ids) == 0:
278
ancestor_id = _mod_revision.NULL_REVISION
280
ancestor_id = new_rev.parent_ids[0]
281
old_tree = repo.revision_tree(ancestor_id)
283
diff.show_diff_trees(old_tree, new_tree, s,
284
old_label='', new_label='')
289
for line in text.splitlines():
290
if file_pattern.search(line):
293
elif diff_pattern.search(line):
295
writerevno("=== revno:%s ===" % (revno,))
296
display_revno = False
298
writefileheader(" %s" % (file_header.decode(file_encoding, 'replace'),))
300
line = line.decode(file_encoding, 'replace')
301
writeline(" %s" % (line,))
304
def versioned_grep(opts):
305
wt, branch, relpath = \
306
controldir.ControlDir.open_containing_tree_or_branch('.')
307
with branch.lock_read():
308
start_rev = opts.revision[0]
309
start_revid = start_rev.as_revision_id(branch)
310
if start_revid is None:
311
start_rev = RevisionSpec_revno.from_string("revno:1")
312
start_revid = start_rev.as_revision_id(branch)
313
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
315
if len(opts.revision) == 2:
316
end_rev = opts.revision[1]
317
end_revid = end_rev.as_revision_id(branch)
318
if end_revid is None:
319
end_revno, end_revid = branch.last_revision_info()
320
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
322
grep_mainline = (_rev_on_mainline(srevno_tuple) and
323
_rev_on_mainline(erevno_tuple))
325
# ensure that we go in reverse order
326
if srevno_tuple > erevno_tuple:
327
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
328
start_revid, end_revid = end_revid, start_revid
330
# Optimization: Traversing the mainline in reverse order is much
331
# faster when we don't want to look at merged revs. We try this
332
# with _linear_view_revisions. If all revs are to be grepped we
333
# use the slower _graph_view_revisions
334
if opts.levels == 1 and grep_mainline:
335
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
337
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
339
# We do an optimization below. For grepping a specific revison
340
# We don't need to call _graph_view_revisions which is slow.
341
# We create the start_rev_tuple for only that specific revision.
342
# _graph_view_revisions is used only for revision range.
343
start_revno = '.'.join(map(str, srevno_tuple))
344
start_rev_tuple = (start_revid, start_revno, 0)
345
given_revs = [start_rev_tuple]
347
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
348
opts.outputter = _Outputter(opts, use_cache=True)
350
for revid, revno, merge_depth in given_revs:
351
if opts.levels == 1 and merge_depth != 0:
352
# with level=1 show only top level
355
rev = RevisionSpec_revid.from_string("revid:"+revid.decode('utf-8'))
356
tree = rev.as_tree(branch)
357
for path in opts.path_list:
358
tree_path = osutils.pathjoin(relpath, path)
359
if not tree.has_filename(tree_path):
360
trace.warning("Skipped unknown file '%s'.", path)
363
if osutils.isdir(path):
365
dir_grep(tree, path, relpath, opts, revno, path_prefix)
367
versioned_file_grep(tree, tree_path, '.', path, opts, revno)
370
def workingtree_grep(opts):
371
revno = opts.print_revno = None # for working tree set revno to None
373
tree, branch, relpath = \
374
controldir.ControlDir.open_containing_tree_or_branch('.')
376
msg = ('Cannot search working tree. Working tree not found.\n'
377
'To search for specific revision in history use the -r option.')
378
raise errors.BzrCommandError(msg)
380
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
381
opts.outputter = _Outputter(opts)
383
with tree.lock_read():
384
for path in opts.path_list:
385
if osutils.isdir(path):
387
dir_grep(tree, path, relpath, opts, revno, path_prefix)
389
with open(path, 'rb') as f:
390
_file_grep(f.read(), path, opts, revno)
393
def _skip_file(include, exclude, path):
394
if include and not _path_in_glob_list(path, include):
396
if exclude and _path_in_glob_list(path, exclude):
401
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
402
# setup relpath to open files relative to cwd
405
rpath = osutils.pathjoin('..', relpath)
407
from_dir = osutils.pathjoin(relpath, path)
409
# start searching recursively from root
414
to_grep_append = to_grep.append
415
# GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
416
# and hits manually refilled. Could do this again if it was
417
# for a good reason, otherwise cache might want purging.
418
outputter = opts.outputter
419
for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
420
from_dir=from_dir, recursive=opts.recursive):
422
if _skip_file(opts.include, opts.exclude, fp):
425
if fc == 'V' and fkind == 'file':
426
tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
427
if revno is not None:
428
# If old result is valid, print results immediately.
429
# Otherwise, add file info to to_grep so that the
430
# loop later will get chunks and grep them
431
cache_id = tree.get_file_revision(tree_path, fid)
432
if cache_id in outputter.cache:
433
# GZ 2010-06-05: Not really sure caching and re-outputting
434
# the old path is really the right thing,
435
# but it's what the old code seemed to do
436
outputter.write_cached_lines(cache_id, revno)
438
to_grep_append((tree_path, (fp, tree_path)))
440
# we are grepping working tree.
444
path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
445
if opts.files_with_matches or opts.files_without_match:
446
# Optimize for wtree list-only as we don't need to read the
448
with open(path_for_file, 'rb', buffering=4096) as file:
449
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
451
with open(path_for_file, 'rb') as f:
452
_file_grep(f.read(), fp, opts, revno, path_prefix)
454
if revno is not None: # grep versioned files
455
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
456
path = _make_display_path(relpath, path)
457
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
458
tree.get_file_revision(tree_path))
461
def _make_display_path(relpath, path):
462
"""Return path string relative to user cwd.
464
Take tree's 'relpath' and user supplied 'path', and return path
465
that can be displayed to the user.
468
# update path so to display it w.r.t cwd
469
# handle windows slash separator
470
path = osutils.normpath(osutils.pathjoin(relpath, path))
471
path = path.replace('\\', '/')
472
path = path.replace(relpath + '/', '', 1)
476
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix = None):
477
"""Create a file object for the specified id and pass it on to _file_grep.
480
path = _make_display_path(relpath, path)
481
file_text = tree.get_file_text(tree_path)
482
_file_grep(file_text, path, opts, revno, path_prefix)
485
def _path_in_glob_list(path, glob_list):
486
for glob in glob_list:
487
if fnmatch(path, glob):
492
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
493
# test and skip binary files
494
if b'\x00' in file.read(1024):
496
trace.warning("Binary file '%s' skipped.", path)
499
file.seek(0) # search from beginning
502
if opts.fixed_string:
503
pattern = opts.pattern.encode(_user_encoding, 'replace')
508
else: # not fixed_string
510
if opts.patternc.search(line):
514
if (opts.files_with_matches and found) or \
515
(opts.files_without_match and not found):
516
if path_prefix and path_prefix != '.':
517
# user has passed a dir arg, show that as result prefix
518
path = osutils.pathjoin(path_prefix, path)
519
opts.outputter.get_writer(path, None, None)()
522
class _Outputter(object):
523
"""Precalculate formatting based on options given
525
The idea here is to do this work only once per run, and finally return a
526
function that will do the minimum amount possible for each match.
528
def __init__(self, opts, use_cache=False):
529
self.outf = opts.outf
531
# self.cache is used to cache results for dir grep based on fid.
532
# If the fid is does not change between results, it means that
533
# the result will be the same apart from revno. In such a case
534
# we avoid getting file chunks from repo and grepping. The result
535
# is just printed by replacing old revno with new one.
539
no_line = opts.files_with_matches or opts.files_without_match
543
self.get_writer = self._get_writer_plain
544
elif opts.fixed_string:
545
self._old = opts.pattern
546
self._new = color_string(opts.pattern, FG.BOLD_RED)
547
self.get_writer = self._get_writer_fixed_highlighted
549
flags = opts.patternc.flags
550
self._sub = re.compile(opts.pattern.join(("((?:", ")+)")), flags).sub
551
self._highlight = color_string("\\1", FG.BOLD_RED)
552
self.get_writer = self._get_writer_regexp_highlighted
553
path_start = FG.MAGENTA
555
sep = color_string(':', FG.BOLD_CYAN)
556
rev_sep = color_string('~', FG.BOLD_YELLOW)
558
self.get_writer = self._get_writer_plain
559
path_start = path_end = ""
563
parts = [path_start, "%(path)s"]
565
parts.extend([rev_sep, "%(revno)s"])
566
self._format_initial = "".join(parts)
569
if not opts.print_revno:
570
parts.append(path_end)
573
parts.extend([sep, "%(lineno)s"])
574
parts.extend([sep, "%(line)s"])
575
parts.append(opts.eol_marker)
576
self._format_perline = "".join(parts)
578
def _get_writer_plain(self, path, revno, cache_id):
579
"""Get function for writing uncoloured output"""
580
per_line = self._format_perline
581
start = self._format_initial % {"path":path, "revno":revno}
582
write = self.outf.write
583
if self.cache is not None and cache_id is not None:
585
self.cache[cache_id] = path, result_list
586
add_to_cache = result_list.append
587
def _line_cache_and_writer(**kwargs):
588
"""Write formatted line and cache arguments"""
589
end = per_line % kwargs
592
return _line_cache_and_writer
593
def _line_writer(**kwargs):
594
"""Write formatted line from arguments given by underlying opts"""
595
write(start + per_line % kwargs)
598
def write_cached_lines(self, cache_id, revno):
599
"""Write cached results out again for new revision"""
600
cached_path, cached_matches = self.cache[cache_id]
601
start = self._format_initial % {"path":cached_path, "revno":revno}
602
write = self.outf.write
603
for end in cached_matches:
606
def _get_writer_regexp_highlighted(self, path, revno, cache_id):
607
"""Get function for writing output with regexp match highlighted"""
608
_line_writer = self._get_writer_plain(path, revno, cache_id)
609
sub, highlight = self._sub, self._highlight
610
def _line_writer_regexp_highlighted(line, **kwargs):
611
"""Write formatted line with matched pattern highlighted"""
612
return _line_writer(line=sub(highlight, line), **kwargs)
613
return _line_writer_regexp_highlighted
615
def _get_writer_fixed_highlighted(self, path, revno, cache_id):
616
"""Get function for writing output with search string highlighted"""
617
_line_writer = self._get_writer_plain(path, revno, cache_id)
618
old, new = self._old, self._new
619
def _line_writer_fixed_highlighted(line, **kwargs):
620
"""Write formatted line with string searched for highlighted"""
621
return _line_writer(line=line.replace(old, new), **kwargs)
622
return _line_writer_fixed_highlighted
625
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
626
# test and skip binary files
627
if b'\x00' in file_text[:1024]:
629
trace.warning("Binary file '%s' skipped.", path)
632
if path_prefix and path_prefix != '.':
633
# user has passed a dir arg, show that as result prefix
634
path = osutils.pathjoin(path_prefix, path)
636
# GZ 2010-06-07: There's no actual guarentee the file contents will be in
637
# the user encoding, but we have to guess something and it
638
# is a reasonable default without a better mechanism.
639
file_encoding = _user_encoding
640
pattern = opts.pattern.encode(_user_encoding, 'replace')
642
writeline = opts.outputter.get_writer(path, revno, cache_id)
644
if opts.files_with_matches or opts.files_without_match:
645
if opts.fixed_string:
646
found = pattern in file_text
648
search = opts.patternc.search
649
if b"$" not in pattern:
650
found = search(file_text) is not None
652
for line in file_text.splitlines():
658
if (opts.files_with_matches and found) or \
659
(opts.files_without_match and not found):
661
elif opts.fixed_string:
662
# Fast path for no match, search through the entire file at once rather
663
# than a line at a time. <http://effbot.org/zone/stringlib.htm>
664
i = file_text.find(pattern)
667
b = file_text.rfind(b"\n", 0, i) + 1
669
start = file_text.count(b"\n", 0, b) + 1
670
file_text = file_text[b:]
672
for index, line in enumerate(file_text.splitlines()):
674
line = line.decode(file_encoding, 'replace')
675
writeline(lineno=index+start, line=line)
677
for line in file_text.splitlines():
679
line = line.decode(file_encoding, 'replace')
682
# Fast path on no match, the re module avoids bad behaviour in most
683
# standard cases, but perhaps could try and detect backtracking
684
# patterns here and avoid whole text search in those cases
685
search = opts.patternc.search
686
if b"$" not in pattern:
687
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
688
# through revisions as bazaar returns binary mode
689
# and trailing \r breaks $ as line ending match
690
m = search(file_text)
693
b = file_text.rfind(b"\n", 0, m.start()) + 1
695
start = file_text.count(b"\n", 0, b) + 1
696
file_text = file_text[b:]
700
for index, line in enumerate(file_text.splitlines()):
702
line = line.decode(file_encoding, 'replace')
703
writeline(lineno=index+start, line=line)
705
for line in file_text.splitlines():
707
line = line.decode(file_encoding, 'replace')