13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from __future__ import absolute_import
21
from bzrlib.lazy_import import lazy_import
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from io import BytesIO
20
from .lazy_import import lazy_import
22
21
lazy_import(globals(), """
23
22
from fnmatch import fnmatch
25
from cStringIO import StringIO
27
from bzrlib._termcolor import color_string, re_color_string, FG
29
from bzrlib.revisionspec import (
24
from breezy._termcolor import color_string, FG
40
34
revision as _mod_revision,
37
from .revisionspec import (
45
43
_user_encoding = osutils.get_user_encoding()
49
47
"""Raised when a revision is not on left-hand history."""
50
class GrepOptions(object):
51
"""Container to pass around grep options.
53
This class is used as a container to pass around user option and
54
some other params (like outf) to processing functions. This makes
55
it easier to add more options as grep evolves.
70
files_with_matches = False
71
files_without_match = False
52
86
def _rev_on_mainline(rev_tuple):
53
87
"""returns True is rev tuple is on mainline"""
54
88
if len(rev_tuple) == 1:
59
93
# NOTE: _linear_view_revisions is basided on
60
# bzrlib.log._linear_view_revisions.
94
# breezy.log._linear_view_revisions.
61
95
# This should probably be a common public API
62
96
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
63
97
# requires that start is older than end
76
110
# NOTE: _graph_view_revisions is copied from
77
# bzrlib.log._graph_view_revisions.
111
# breezy.log._graph_view_revisions.
78
112
# This should probably be a common public API
79
113
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
80
114
rebase_initial_depths=True):
119
153
def compile_pattern(pattern, flags=0):
122
# use python's re.compile as we need to catch re.error in case of bad pattern
123
lazy_regex.reset_compile()
124
patternc = re.compile(pattern, flags)
155
return re.compile(pattern, flags)
156
except re.error as e:
126
157
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
130
161
def is_fixed_string(s):
131
if re.match("^([A-Za-z0-9_]|\s)*$", s):
162
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
142
173
self.outf = opts.outf
143
174
if opts.show_color:
144
pat = opts.pattern.encode(_user_encoding, 'replace')
145
175
if opts.fixed_string:
147
self._new = color_string(pat, FG.BOLD_RED)
176
self._old = opts.pattern
177
self._new = color_string(opts.pattern, FG.BOLD_RED)
148
178
self.get_writer = self._get_writer_fixed_highlighted
150
180
flags = opts.patternc.flags
151
self._sub = re.compile(pat.join(("((?:",")+)")), flags).sub
181
self._sub = re.compile(
182
opts.pattern.join(("((?:", ")+)")), flags).sub
152
183
self._highlight = color_string("\\1", FG.BOLD_RED)
153
184
self.get_writer = self._get_writer_regexp_highlighted
194
230
"""Get function for writing output with regexp match highlighted"""
195
231
_line_writer = self._get_writer_plain()
196
232
sub, highlight = self._sub, self._highlight
197
234
def _line_writer_regexp_highlighted(line):
198
235
"""Write formatted line with matched pattern highlighted"""
199
236
return _line_writer(line=sub(highlight, line))
203
240
"""Get function for writing output with search string highlighted"""
204
241
_line_writer = self._get_writer_plain()
205
242
old, new = self._old, self._new
206
244
def _line_writer_fixed_highlighted(line):
207
245
"""Write formatted line with string searched for highlighted"""
208
246
return _line_writer(line=line.replace(old, new))
212
250
def grep_diff(opts):
213
251
wt, branch, relpath = \
214
bzrdir.BzrDir.open_containing_tree_or_branch('.')
252
controldir.ControlDir.open_containing_tree_or_branch('.')
253
with branch.lock_read():
217
254
if opts.revision:
218
255
start_rev = opts.revision[0]
220
257
# if no revision is sepcified for diff grep we grep all changesets.
221
258
opts.revision = [RevisionSpec.from_string('revno:1'),
222
RevisionSpec.from_string('last:1')]
259
RevisionSpec.from_string('last:1')]
223
260
start_rev = opts.revision[0]
224
261
start_revid = start_rev.as_revision_id(branch)
225
if start_revid == 'null:':
262
if start_revid == b'null:':
227
264
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
228
265
if len(opts.revision) == 2:
232
269
end_revno, end_revid = branch.last_revision_info()
233
270
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
235
grep_mainline = (_rev_on_mainline(srevno_tuple) and
236
_rev_on_mainline(erevno_tuple))
272
grep_mainline = (_rev_on_mainline(srevno_tuple)
273
and _rev_on_mainline(erevno_tuple))
238
275
# ensure that we go in reverse order
239
276
if srevno_tuple > erevno_tuple:
244
281
# faster when we don't want to look at merged revs. We try this
245
282
# with _linear_view_revisions. If all revs are to be grepped we
246
283
# use the slower _graph_view_revisions
247
if opts.levels==1 and grep_mainline:
248
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
284
if opts.levels == 1 and grep_mainline:
285
given_revs = _linear_view_revisions(
286
branch, start_revid, end_revid)
250
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
288
given_revs = _graph_view_revisions(
289
branch, start_revid, end_revid)
252
291
# We do an optimization below. For grepping a specific revison
253
292
# We don't need to call _graph_view_revisions which is slow.
257
296
start_rev_tuple = (start_revid, start_revno, 0)
258
297
given_revs = [start_rev_tuple]
259
298
repo = branch.repository
260
diff_pattern = re.compile("^[+\-].*(" + opts.pattern + ")")
261
file_pattern = re.compile("=== (modified|added|removed) file '.*'", re.UNICODE)
299
diff_pattern = re.compile(
300
b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
301
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
262
302
outputter = _GrepDiffOutputter(opts)
263
303
writeline = outputter.get_writer()
264
304
writerevno = outputter.get_revision_header_writer()
269
309
# with level=1 show only top level
272
rev_spec = RevisionSpec_revid.from_string("revid:"+revid)
312
rev_spec = RevisionSpec_revid.from_string(
313
"revid:" + revid.decode('utf-8'))
273
314
new_rev = repo.get_revision(revid)
274
315
new_tree = rev_spec.as_tree(branch)
275
316
if len(new_rev.parent_ids) == 0:
278
319
ancestor_id = new_rev.parent_ids[0]
279
320
old_tree = repo.revision_tree(ancestor_id)
281
322
diff.show_diff_trees(old_tree, new_tree, s,
282
old_label='', new_label='')
323
old_label='', new_label='')
283
324
display_revno = True
284
325
display_file = False
285
326
file_header = None
293
334
writerevno("=== revno:%s ===" % (revno,))
294
335
display_revno = False
296
writefileheader(" %s" % (file_header,))
338
" %s" % (file_header.decode(file_encoding, 'replace'),))
297
339
display_file = False
298
340
line = line.decode(file_encoding, 'replace')
299
341
writeline(" %s" % (line,))
304
344
def versioned_grep(opts):
305
345
wt, branch, relpath = \
306
bzrdir.BzrDir.open_containing_tree_or_branch('.')
346
controldir.ControlDir.open_containing_tree_or_branch('.')
347
with branch.lock_read():
309
348
start_rev = opts.revision[0]
310
349
start_revid = start_rev.as_revision_id(branch)
311
350
if start_revid is None:
320
359
end_revno, end_revid = branch.last_revision_info()
321
360
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
323
grep_mainline = (_rev_on_mainline(srevno_tuple) and
324
_rev_on_mainline(erevno_tuple))
362
grep_mainline = (_rev_on_mainline(srevno_tuple)
363
and _rev_on_mainline(erevno_tuple))
326
365
# ensure that we go in reverse order
327
366
if srevno_tuple > erevno_tuple:
333
372
# with _linear_view_revisions. If all revs are to be grepped we
334
373
# use the slower _graph_view_revisions
335
374
if opts.levels == 1 and grep_mainline:
336
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
375
given_revs = _linear_view_revisions(
376
branch, start_revid, end_revid)
338
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
378
given_revs = _graph_view_revisions(
379
branch, start_revid, end_revid)
340
381
# We do an optimization below. For grepping a specific revison
341
382
# We don't need to call _graph_view_revisions which is slow.
353
394
# with level=1 show only top level
356
rev = RevisionSpec_revid.from_string("revid:"+revid)
397
rev = RevisionSpec_revid.from_string(
398
"revid:" + revid.decode('utf-8'))
357
399
tree = rev.as_tree(branch)
358
400
for path in opts.path_list:
359
path_for_id = osutils.pathjoin(relpath, path)
360
id = tree.path2id(path_for_id)
362
trace.warning("Skipped unknown file '%s'." % path)
401
tree_path = osutils.pathjoin(relpath, path)
402
if not tree.has_filename(tree_path):
403
trace.warning("Skipped unknown file '%s'.", path)
365
406
if osutils.isdir(path):
366
407
path_prefix = path
367
408
dir_grep(tree, path, relpath, opts, revno, path_prefix)
369
versioned_file_grep(tree, id, '.', path, opts, revno)
411
tree, tree_path, '.', path, opts, revno)
374
414
def workingtree_grep(opts):
375
revno = opts.print_revno = None # for working tree set revno to None
415
revno = opts.print_revno = None # for working tree set revno to None
377
417
tree, branch, relpath = \
378
bzrdir.BzrDir.open_containing_tree_or_branch('.')
418
controldir.ControlDir.open_containing_tree_or_branch('.')
380
420
msg = ('Cannot search working tree. Working tree not found.\n'
381
'To search for specific revision in history use the -r option.')
382
raise errors.BzrCommandError(msg)
421
'To search for specific revision in history use the -r option.')
422
raise errors.CommandError(msg)
384
424
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
385
425
opts.outputter = _Outputter(opts)
427
with tree.lock_read():
389
428
for path in opts.path_list:
390
429
if osutils.isdir(path):
391
430
path_prefix = path
392
431
dir_grep(tree, path, relpath, opts, revno, path_prefix)
394
_file_grep(open(path).read(), path, opts, revno)
433
with open(path, 'rb') as f:
434
_file_grep(f.read(), path, opts, revno)
399
437
def _skip_file(include, exclude, path):
408
446
# setup relpath to open files relative to cwd
411
rpath = osutils.pathjoin('..',relpath)
449
rpath = osutils.pathjoin('..', relpath)
413
451
from_dir = osutils.pathjoin(relpath, path)
414
452
if opts.from_root:
415
453
# start searching recursively from root
420
458
to_grep_append = to_grep.append
422
460
# and hits manually refilled. Could do this again if it was
423
461
# for a good reason, otherwise cache might want purging.
424
462
outputter = opts.outputter
425
for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
426
from_dir=from_dir, recursive=opts.recursive):
463
for fp, fc, fkind, entry in tree.list_files(
464
include_root=False, from_dir=from_dir, recursive=opts.recursive):
428
466
if _skip_file(opts.include, opts.exclude, fp):
431
469
if fc == 'V' and fkind == 'file':
470
tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
471
if revno is not None:
433
472
# If old result is valid, print results immediately.
434
473
# Otherwise, add file info to to_grep so that the
435
474
# loop later will get chunks and grep them
436
cache_id = tree.get_file_revision(fid)
475
cache_id = tree.get_file_revision(tree_path)
437
476
if cache_id in outputter.cache:
438
477
# GZ 2010-06-05: Not really sure caching and re-outputting
439
478
# the old path is really the right thing,
440
479
# but it's what the old code seemed to do
441
480
outputter.write_cached_lines(cache_id, revno)
443
to_grep_append((fid, (fp, fid)))
482
to_grep_append((tree_path, (fp, tree_path)))
445
484
# we are grepping working tree.
446
485
if from_dir is None:
450
489
if opts.files_with_matches or opts.files_without_match:
451
490
# Optimize for wtree list-only as we don't need to read the
453
file = open(path_for_file, 'r', buffering=4096)
454
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
492
with open(path_for_file, 'rb', buffering=4096) as file:
493
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
456
file_text = open(path_for_file, 'r').read()
457
_file_grep(file_text, fp, opts, revno, path_prefix)
495
with open(path_for_file, 'rb') as f:
496
_file_grep(f.read(), fp, opts, revno, path_prefix)
459
if revno != None: # grep versioned files
460
for (path, fid), chunks in tree.iter_files_bytes(to_grep):
498
if revno is not None: # grep versioned files
499
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
461
500
path = _make_display_path(relpath, path)
462
_file_grep(chunks[0], path, opts, revno, path_prefix,
463
tree.get_file_revision(fid, path))
501
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
502
tree.get_file_revision(tree_path))
466
505
def _make_display_path(relpath, path):
481
def versioned_file_grep(tree, id, relpath, path, opts, revno, path_prefix = None):
520
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix=None):
482
521
"""Create a file object for the specified id and pass it on to _file_grep.
485
524
path = _make_display_path(relpath, path)
486
file_text = tree.get_file_text(id)
525
file_text = tree.get_file_text(tree_path)
487
526
_file_grep(file_text, path, opts, revno, path_prefix)
497
536
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
498
537
# test and skip binary files
499
if '\x00' in file.read(1024):
538
if b'\x00' in file.read(1024):
501
trace.warning("Binary file '%s' skipped." % path)
540
trace.warning("Binary file '%s' skipped.", path)
504
file.seek(0) # search from beginning
543
file.seek(0) # search from beginning
507
546
if opts.fixed_string:
510
549
if pattern in line:
513
else: # not fixed_string
552
else: # not fixed_string
514
553
for line in file:
515
554
if opts.patternc.search(line):
519
558
if (opts.files_with_matches and found) or \
520
(opts.files_without_match and not found):
559
(opts.files_without_match and not found):
521
560
if path_prefix and path_prefix != '.':
522
561
# user has passed a dir arg, show that as result prefix
523
562
path = osutils.pathjoin(path_prefix, path)
544
584
no_line = opts.files_with_matches or opts.files_without_match
546
586
if opts.show_color:
547
pat = opts.pattern.encode(_user_encoding, 'replace')
549
588
self.get_writer = self._get_writer_plain
550
589
elif opts.fixed_string:
552
self._new = color_string(pat, FG.BOLD_RED)
590
self._old = opts.pattern
591
self._new = color_string(opts.pattern, FG.BOLD_RED)
553
592
self.get_writer = self._get_writer_fixed_highlighted
555
594
flags = opts.patternc.flags
556
self._sub = re.compile(pat.join(("((?:",")+)")), flags).sub
595
self._sub = re.compile(
596
opts.pattern.join(("((?:", ")+)")), flags).sub
557
597
self._highlight = color_string("\\1", FG.BOLD_RED)
558
598
self.get_writer = self._get_writer_regexp_highlighted
559
599
path_start = FG.MAGENTA
584
624
def _get_writer_plain(self, path, revno, cache_id):
585
625
"""Get function for writing uncoloured output"""
586
626
per_line = self._format_perline
587
start = self._format_initial % {"path":path, "revno":revno}
627
start = self._format_initial % {"path": path, "revno": revno}
588
628
write = self.outf.write
589
629
if self.cache is not None and cache_id is not None:
591
631
self.cache[cache_id] = path, result_list
592
632
add_to_cache = result_list.append
593
634
def _line_cache_and_writer(**kwargs):
594
635
"""Write formatted line and cache arguments"""
595
636
end = per_line % kwargs
596
637
add_to_cache(end)
597
638
write(start + end)
598
639
return _line_cache_and_writer
599
641
def _line_writer(**kwargs):
600
642
"""Write formatted line from arguments given by underlying opts"""
601
643
write(start + per_line % kwargs)
604
646
def write_cached_lines(self, cache_id, revno):
605
647
"""Write cached results out again for new revision"""
606
648
cached_path, cached_matches = self.cache[cache_id]
607
start = self._format_initial % {"path":cached_path, "revno":revno}
649
start = self._format_initial % {"path": cached_path, "revno": revno}
608
650
write = self.outf.write
609
651
for end in cached_matches:
610
652
write(start + end)
613
655
"""Get function for writing output with regexp match highlighted"""
614
656
_line_writer = self._get_writer_plain(path, revno, cache_id)
615
657
sub, highlight = self._sub, self._highlight
616
659
def _line_writer_regexp_highlighted(line, **kwargs):
617
660
"""Write formatted line with matched pattern highlighted"""
618
661
return _line_writer(line=sub(highlight, line), **kwargs)
622
665
"""Get function for writing output with search string highlighted"""
623
666
_line_writer = self._get_writer_plain(path, revno, cache_id)
624
667
old, new = self._old, self._new
625
669
def _line_writer_fixed_highlighted(line, **kwargs):
626
670
"""Write formatted line with string searched for highlighted"""
627
671
return _line_writer(line=line.replace(old, new), **kwargs)
631
675
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
632
676
# test and skip binary files
633
if '\x00' in file_text[:1024]:
677
if b'\x00' in file_text[:1024]:
635
trace.warning("Binary file '%s' skipped." % path)
679
trace.warning("Binary file '%s' skipped.", path)
638
682
if path_prefix and path_prefix != '.':
650
694
if opts.files_with_matches or opts.files_without_match:
651
695
if opts.fixed_string:
652
if sys.platform > (2, 5):
653
found = pattern in file_text
655
for line in file_text.splitlines():
696
found = pattern in file_text
662
698
search = opts.patternc.search
663
if "$" not in pattern:
699
if b"$" not in pattern:
664
700
found = search(file_text) is not None
666
702
for line in file_text.splitlines():
675
711
elif opts.fixed_string:
676
712
# Fast path for no match, search through the entire file at once rather
677
# than a line at a time. However, we don't want this without Python 2.5
678
# as the quick string search algorithm wasn't implemented till then:
679
# <http://effbot.org/zone/stringlib.htm>
680
if sys.version_info > (2, 5):
681
i = file_text.find(pattern)
684
b = file_text.rfind("\n", 0, i) + 1
686
start = file_text.count("\n", 0, b) + 1
687
file_text = file_text[b:]
713
# than a line at a time. <http://effbot.org/zone/stringlib.htm>
714
i = file_text.find(pattern)
717
b = file_text.rfind(b"\n", 0, i) + 1
719
start = file_text.count(b"\n", 0, b) + 1
720
file_text = file_text[b:]
690
721
if opts.line_number:
691
722
for index, line in enumerate(file_text.splitlines()):
692
723
if pattern in line:
693
724
line = line.decode(file_encoding, 'replace')
694
writeline(lineno=index+start, line=line)
725
writeline(lineno=index + start, line=line)
696
727
for line in file_text.splitlines():
697
728
if pattern in line:
702
733
# standard cases, but perhaps could try and detect backtracking
703
734
# patterns here and avoid whole text search in those cases
704
735
search = opts.patternc.search
705
if "$" not in pattern:
736
if b"$" not in pattern:
706
737
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
707
738
# through revisions as bazaar returns binary mode
708
739
# and trailing \r breaks $ as line ending match
709
740
m = search(file_text)
712
b = file_text.rfind("\n", 0, m.start()) + 1
743
b = file_text.rfind(b"\n", 0, m.start()) + 1
713
744
if opts.line_number:
714
start = file_text.count("\n", 0, b) + 1
745
start = file_text.count(b"\n", 0, b) + 1
715
746
file_text = file_text[b:]
719
750
for index, line in enumerate(file_text.splitlines()):
721
752
line = line.decode(file_encoding, 'replace')
722
writeline(lineno=index+start, line=line)
753
writeline(lineno=index + start, line=line)
724
755
for line in file_text.splitlines():
726
757
line = line.decode(file_encoding, 'replace')
727
758
writeline(line=line)