1
1
# Copyright (C) 2010 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from __future__ import absolute_import
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from io import BytesIO
22
from ...lazy_import import lazy_import
20
from .lazy_import import lazy_import
23
21
lazy_import(globals(), """
24
22
from fnmatch import fnmatch
26
24
from breezy._termcolor import color_string, FG
28
26
from breezy import (
33
34
revision as _mod_revision,
40
from breezy.revisionspec import (
37
from .revisionspec import (
42
39
RevisionSpec_revid,
43
40
RevisionSpec_revno,
45
from breezy.sixish import (
49
43
_user_encoding = osutils.get_user_encoding()
53
47
"""Raised when a revision is not on left-hand history."""
50
class GrepOptions(object):
51
"""Container to pass around grep options.
53
This class is used as a container to pass around user option and
54
some other params (like outf) to processing functions. This makes
55
it easier to add more options as grep evolves.
70
files_with_matches = False
71
files_without_match = False
56
86
def _rev_on_mainline(rev_tuple):
57
87
"""returns True is rev tuple is on mainline"""
58
88
if len(rev_tuple) == 1:
123
153
def compile_pattern(pattern, flags=0):
126
# use python's re.compile as we need to catch re.error in case of bad pattern
127
lazy_regex.reset_compile()
128
patternc = re.compile(pattern, flags)
155
return re.compile(pattern, flags)
129
156
except re.error as e:
130
157
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
134
161
def is_fixed_string(s):
135
if re.match("^([A-Za-z0-9_]|\s)*$", s):
162
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
146
173
self.outf = opts.outf
147
174
if opts.show_color:
148
pat = opts.pattern.encode(_user_encoding, 'replace')
149
175
if opts.fixed_string:
151
self._new = color_string(pat, FG.BOLD_RED)
176
self._old = opts.pattern
177
self._new = color_string(opts.pattern, FG.BOLD_RED)
152
178
self.get_writer = self._get_writer_fixed_highlighted
154
180
flags = opts.patternc.flags
155
self._sub = re.compile(pat.join(("((?:", ")+)")), flags).sub
181
self._sub = re.compile(
182
opts.pattern.join(("((?:", ")+)")), flags).sub
156
183
self._highlight = color_string("\\1", FG.BOLD_RED)
157
184
self.get_writer = self._get_writer_regexp_highlighted
198
230
"""Get function for writing output with regexp match highlighted"""
199
231
_line_writer = self._get_writer_plain()
200
232
sub, highlight = self._sub, self._highlight
201
234
def _line_writer_regexp_highlighted(line):
202
235
"""Write formatted line with matched pattern highlighted"""
203
236
return _line_writer(line=sub(highlight, line))
207
240
"""Get function for writing output with search string highlighted"""
208
241
_line_writer = self._get_writer_plain()
209
242
old, new = self._old, self._new
210
244
def _line_writer_fixed_highlighted(line):
211
245
"""Write formatted line with string searched for highlighted"""
212
246
return _line_writer(line=line.replace(old, new))
223
257
# if no revision is sepcified for diff grep we grep all changesets.
224
258
opts.revision = [RevisionSpec.from_string('revno:1'),
225
RevisionSpec.from_string('last:1')]
259
RevisionSpec.from_string('last:1')]
226
260
start_rev = opts.revision[0]
227
261
start_revid = start_rev.as_revision_id(branch)
228
if start_revid == 'null:':
262
if start_revid == b'null:':
230
264
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
231
265
if len(opts.revision) == 2:
235
269
end_revno, end_revid = branch.last_revision_info()
236
270
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
238
grep_mainline = (_rev_on_mainline(srevno_tuple) and
239
_rev_on_mainline(erevno_tuple))
272
grep_mainline = (_rev_on_mainline(srevno_tuple)
273
and _rev_on_mainline(erevno_tuple))
241
275
# ensure that we go in reverse order
242
276
if srevno_tuple > erevno_tuple:
247
281
# faster when we don't want to look at merged revs. We try this
248
282
# with _linear_view_revisions. If all revs are to be grepped we
249
283
# use the slower _graph_view_revisions
250
if opts.levels==1 and grep_mainline:
251
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
284
if opts.levels == 1 and grep_mainline:
285
given_revs = _linear_view_revisions(
286
branch, start_revid, end_revid)
253
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
288
given_revs = _graph_view_revisions(
289
branch, start_revid, end_revid)
255
291
# We do an optimization below. For grepping a specific revison
256
292
# We don't need to call _graph_view_revisions which is slow.
260
296
start_rev_tuple = (start_revid, start_revno, 0)
261
297
given_revs = [start_rev_tuple]
262
298
repo = branch.repository
263
diff_pattern = re.compile("^[+\-].*(" + opts.pattern + ")")
264
file_pattern = re.compile("=== (modified|added|removed) file '.*'", re.UNICODE)
299
diff_pattern = re.compile(
300
b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
301
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
265
302
outputter = _GrepDiffOutputter(opts)
266
303
writeline = outputter.get_writer()
267
304
writerevno = outputter.get_revision_header_writer()
272
309
# with level=1 show only top level
275
rev_spec = RevisionSpec_revid.from_string("revid:"+revid)
312
rev_spec = RevisionSpec_revid.from_string(
313
"revid:" + revid.decode('utf-8'))
276
314
new_rev = repo.get_revision(revid)
277
315
new_tree = rev_spec.as_tree(branch)
278
316
if len(new_rev.parent_ids) == 0:
282
320
old_tree = repo.revision_tree(ancestor_id)
284
322
diff.show_diff_trees(old_tree, new_tree, s,
285
old_label='', new_label='')
323
old_label='', new_label='')
286
324
display_revno = True
287
325
display_file = False
288
326
file_header = None
296
334
writerevno("=== revno:%s ===" % (revno,))
297
335
display_revno = False
299
writefileheader(" %s" % (file_header,))
338
" %s" % (file_header.decode(file_encoding, 'replace'),))
300
339
display_file = False
301
340
line = line.decode(file_encoding, 'replace')
302
341
writeline(" %s" % (line,))
320
359
end_revno, end_revid = branch.last_revision_info()
321
360
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
323
grep_mainline = (_rev_on_mainline(srevno_tuple) and
324
_rev_on_mainline(erevno_tuple))
362
grep_mainline = (_rev_on_mainline(srevno_tuple)
363
and _rev_on_mainline(erevno_tuple))
326
365
# ensure that we go in reverse order
327
366
if srevno_tuple > erevno_tuple:
333
372
# with _linear_view_revisions. If all revs are to be grepped we
334
373
# use the slower _graph_view_revisions
335
374
if opts.levels == 1 and grep_mainline:
336
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
375
given_revs = _linear_view_revisions(
376
branch, start_revid, end_revid)
338
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
378
given_revs = _graph_view_revisions(
379
branch, start_revid, end_revid)
340
381
# We do an optimization below. For grepping a specific revison
341
382
# We don't need to call _graph_view_revisions which is slow.
353
394
# with level=1 show only top level
356
rev = RevisionSpec_revid.from_string("revid:"+revid)
397
rev = RevisionSpec_revid.from_string(
398
"revid:" + revid.decode('utf-8'))
357
399
tree = rev.as_tree(branch)
358
400
for path in opts.path_list:
359
path_for_id = osutils.pathjoin(relpath, path)
360
id = tree.path2id(path_for_id)
362
trace.warning("Skipped unknown file '%s'." % path)
401
tree_path = osutils.pathjoin(relpath, path)
402
if not tree.has_filename(tree_path):
403
trace.warning("Skipped unknown file '%s'.", path)
365
406
if osutils.isdir(path):
366
407
path_prefix = path
367
408
dir_grep(tree, path, relpath, opts, revno, path_prefix)
369
versioned_file_grep(tree, id, '.', path, opts, revno)
411
tree, tree_path, '.', path, opts, revno)
372
414
def workingtree_grep(opts):
373
revno = opts.print_revno = None # for working tree set revno to None
415
revno = opts.print_revno = None # for working tree set revno to None
375
417
tree, branch, relpath = \
376
418
controldir.ControlDir.open_containing_tree_or_branch('.')
378
420
msg = ('Cannot search working tree. Working tree not found.\n'
379
'To search for specific revision in history use the -r option.')
421
'To search for specific revision in history use the -r option.')
380
422
raise errors.BzrCommandError(msg)
382
424
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
388
430
path_prefix = path
389
431
dir_grep(tree, path, relpath, opts, revno, path_prefix)
391
_file_grep(open(path).read(), path, opts, revno)
433
with open(path, 'rb') as f:
434
_file_grep(f.read(), path, opts, revno)
394
437
def _skip_file(include, exclude, path):
417
460
# and hits manually refilled. Could do this again if it was
418
461
# for a good reason, otherwise cache might want purging.
419
462
outputter = opts.outputter
420
for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
421
from_dir=from_dir, recursive=opts.recursive):
463
for fp, fc, fkind, entry in tree.list_files(
464
include_root=False, from_dir=from_dir, recursive=opts.recursive):
423
466
if _skip_file(opts.include, opts.exclude, fp):
426
469
if fc == 'V' and fkind == 'file':
470
tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
471
if revno is not None:
428
472
# If old result is valid, print results immediately.
429
473
# Otherwise, add file info to to_grep so that the
430
474
# loop later will get chunks and grep them
431
cache_id = tree.get_file_revision(fp, fid)
475
cache_id = tree.get_file_revision(tree_path)
432
476
if cache_id in outputter.cache:
433
477
# GZ 2010-06-05: Not really sure caching and re-outputting
434
478
# the old path is really the right thing,
435
479
# but it's what the old code seemed to do
436
480
outputter.write_cached_lines(cache_id, revno)
438
to_grep_append((fid, (fp, fid)))
482
to_grep_append((tree_path, (fp, tree_path)))
440
484
# we are grepping working tree.
441
485
if from_dir is None:
445
489
if opts.files_with_matches or opts.files_without_match:
446
490
# Optimize for wtree list-only as we don't need to read the
448
file = open(path_for_file, 'r', buffering=4096)
449
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
492
with open(path_for_file, 'rb', buffering=4096) as file:
493
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
451
file_text = open(path_for_file, 'r').read()
452
_file_grep(file_text, fp, opts, revno, path_prefix)
495
with open(path_for_file, 'rb') as f:
496
_file_grep(f.read(), fp, opts, revno, path_prefix)
454
if revno != None: # grep versioned files
455
for (path, fid), chunks in tree.iter_files_bytes(to_grep):
498
if revno is not None: # grep versioned files
499
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
456
500
path = _make_display_path(relpath, path)
457
_file_grep(chunks[0], path, opts, revno, path_prefix,
458
tree.get_file_revision(path, fid))
501
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
502
tree.get_file_revision(tree_path))
461
505
def _make_display_path(relpath, path):
476
def versioned_file_grep(tree, id, relpath, path, opts, revno, path_prefix = None):
520
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix=None):
477
521
"""Create a file object for the specified id and pass it on to _file_grep.
480
524
path = _make_display_path(relpath, path)
481
file_text = tree.get_file_text(relpath, id)
525
file_text = tree.get_file_text(tree_path)
482
526
_file_grep(file_text, path, opts, revno, path_prefix)
492
536
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
493
537
# test and skip binary files
494
if '\x00' in file.read(1024):
538
if b'\x00' in file.read(1024):
496
trace.warning("Binary file '%s' skipped." % path)
540
trace.warning("Binary file '%s' skipped.", path)
499
file.seek(0) # search from beginning
543
file.seek(0) # search from beginning
502
546
if opts.fixed_string:
505
549
if pattern in line:
508
else: # not fixed_string
552
else: # not fixed_string
509
553
for line in file:
510
554
if opts.patternc.search(line):
514
558
if (opts.files_with_matches and found) or \
515
(opts.files_without_match and not found):
559
(opts.files_without_match and not found):
516
560
if path_prefix and path_prefix != '.':
517
561
# user has passed a dir arg, show that as result prefix
518
562
path = osutils.pathjoin(path_prefix, path)
525
569
The idea here is to do this work only once per run, and finally return a
526
570
function that will do the minimum amount possible for each match.
528
573
def __init__(self, opts, use_cache=False):
529
574
self.outf = opts.outf
539
584
no_line = opts.files_with_matches or opts.files_without_match
541
586
if opts.show_color:
542
pat = opts.pattern.encode(_user_encoding, 'replace')
544
588
self.get_writer = self._get_writer_plain
545
589
elif opts.fixed_string:
547
self._new = color_string(pat, FG.BOLD_RED)
590
self._old = opts.pattern
591
self._new = color_string(opts.pattern, FG.BOLD_RED)
548
592
self.get_writer = self._get_writer_fixed_highlighted
550
594
flags = opts.patternc.flags
551
self._sub = re.compile(pat.join(("((?:", ")+)")), flags).sub
595
self._sub = re.compile(
596
opts.pattern.join(("((?:", ")+)")), flags).sub
552
597
self._highlight = color_string("\\1", FG.BOLD_RED)
553
598
self.get_writer = self._get_writer_regexp_highlighted
554
599
path_start = FG.MAGENTA
579
624
def _get_writer_plain(self, path, revno, cache_id):
580
625
"""Get function for writing uncoloured output"""
581
626
per_line = self._format_perline
582
start = self._format_initial % {"path":path, "revno":revno}
627
start = self._format_initial % {"path": path, "revno": revno}
583
628
write = self.outf.write
584
629
if self.cache is not None and cache_id is not None:
586
631
self.cache[cache_id] = path, result_list
587
632
add_to_cache = result_list.append
588
634
def _line_cache_and_writer(**kwargs):
589
635
"""Write formatted line and cache arguments"""
590
636
end = per_line % kwargs
591
637
add_to_cache(end)
592
638
write(start + end)
593
639
return _line_cache_and_writer
594
641
def _line_writer(**kwargs):
595
642
"""Write formatted line from arguments given by underlying opts"""
596
643
write(start + per_line % kwargs)
599
646
def write_cached_lines(self, cache_id, revno):
600
647
"""Write cached results out again for new revision"""
601
648
cached_path, cached_matches = self.cache[cache_id]
602
start = self._format_initial % {"path":cached_path, "revno":revno}
649
start = self._format_initial % {"path": cached_path, "revno": revno}
603
650
write = self.outf.write
604
651
for end in cached_matches:
605
652
write(start + end)
608
655
"""Get function for writing output with regexp match highlighted"""
609
656
_line_writer = self._get_writer_plain(path, revno, cache_id)
610
657
sub, highlight = self._sub, self._highlight
611
659
def _line_writer_regexp_highlighted(line, **kwargs):
612
660
"""Write formatted line with matched pattern highlighted"""
613
661
return _line_writer(line=sub(highlight, line), **kwargs)
617
665
"""Get function for writing output with search string highlighted"""
618
666
_line_writer = self._get_writer_plain(path, revno, cache_id)
619
667
old, new = self._old, self._new
620
669
def _line_writer_fixed_highlighted(line, **kwargs):
621
670
"""Write formatted line with string searched for highlighted"""
622
671
return _line_writer(line=line.replace(old, new), **kwargs)
626
675
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
627
676
# test and skip binary files
628
if '\x00' in file_text[:1024]:
677
if b'\x00' in file_text[:1024]:
630
trace.warning("Binary file '%s' skipped." % path)
679
trace.warning("Binary file '%s' skipped.", path)
633
682
if path_prefix and path_prefix != '.':
665
714
i = file_text.find(pattern)
668
b = file_text.rfind("\n", 0, i) + 1
717
b = file_text.rfind(b"\n", 0, i) + 1
669
718
if opts.line_number:
670
start = file_text.count("\n", 0, b) + 1
719
start = file_text.count(b"\n", 0, b) + 1
671
720
file_text = file_text[b:]
672
721
if opts.line_number:
673
722
for index, line in enumerate(file_text.splitlines()):
674
723
if pattern in line:
675
724
line = line.decode(file_encoding, 'replace')
676
writeline(lineno=index+start, line=line)
725
writeline(lineno=index + start, line=line)
678
727
for line in file_text.splitlines():
679
728
if pattern in line:
684
733
# standard cases, but perhaps could try and detect backtracking
685
734
# patterns here and avoid whole text search in those cases
686
735
search = opts.patternc.search
687
if "$" not in pattern:
736
if b"$" not in pattern:
688
737
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
689
738
# through revisions as bazaar returns binary mode
690
739
# and trailing \r breaks $ as line ending match
691
740
m = search(file_text)
694
b = file_text.rfind("\n", 0, m.start()) + 1
743
b = file_text.rfind(b"\n", 0, m.start()) + 1
695
744
if opts.line_number:
696
start = file_text.count("\n", 0, b) + 1
745
start = file_text.count(b"\n", 0, b) + 1
697
746
file_text = file_text[b:]
701
750
for index, line in enumerate(file_text.splitlines()):
703
752
line = line.decode(file_encoding, 'replace')
704
writeline(lineno=index+start, line=line)
753
writeline(lineno=index + start, line=line)
706
755
for line in file_text.splitlines():
708
757
line = line.decode(file_encoding, 'replace')
709
758
writeline(line=line)