1
1
# Copyright (C) 2010 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
17
from __future__ import absolute_import
21
from .lazy_import import lazy_import
22
from ...lazy_import import lazy_import
22
23
lazy_import(globals(), """
23
24
from fnmatch import fnmatch
25
26
from breezy._termcolor import color_string, FG
27
28
from breezy import (
33
revision as _mod_revision,
35
revision as _mod_revision,
38
from .revisionspec import (
40
from breezy.revisionspec import (
40
42
RevisionSpec_revid,
41
43
RevisionSpec_revno,
45
from breezy.sixish import (
51
53
"""Raised when a revision is not on left-hand history."""
54
class GrepOptions(object):
55
"""Container to pass around grep options.
57
This class is used as a container to pass around user option and
58
some other params (like outf) to processing functions. This makes
59
it easier to add more options as grep evolves.
74
files_with_matches = False
75
files_without_match = False
90
56
def _rev_on_mainline(rev_tuple):
91
57
"""returns True is rev tuple is on mainline"""
92
58
if len(rev_tuple) == 1:
157
123
def compile_pattern(pattern, flags=0):
159
return re.compile(pattern, flags)
126
# use python's re.compile as we need to catch re.error in case of bad pattern
127
lazy_regex.reset_compile()
128
patternc = re.compile(pattern, flags)
160
129
except re.error as e:
161
130
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
165
134
def is_fixed_string(s):
166
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
135
if re.match("^([A-Za-z0-9_]|\s)*$", s):
177
146
self.outf = opts.outf
178
147
if opts.show_color:
148
pat = opts.pattern.encode(_user_encoding, 'replace')
179
149
if opts.fixed_string:
180
self._old = opts.pattern
181
self._new = color_string(opts.pattern, FG.BOLD_RED)
151
self._new = color_string(pat, FG.BOLD_RED)
182
152
self.get_writer = self._get_writer_fixed_highlighted
184
154
flags = opts.patternc.flags
185
self._sub = re.compile(
186
opts.pattern.join(("((?:", ")+)")), flags).sub
155
self._sub = re.compile(pat.join(("((?:", ")+)")), flags).sub
187
156
self._highlight = color_string("\\1", FG.BOLD_RED)
188
157
self.get_writer = self._get_writer_regexp_highlighted
234
198
"""Get function for writing output with regexp match highlighted"""
235
199
_line_writer = self._get_writer_plain()
236
200
sub, highlight = self._sub, self._highlight
238
201
def _line_writer_regexp_highlighted(line):
239
202
"""Write formatted line with matched pattern highlighted"""
240
203
return _line_writer(line=sub(highlight, line))
244
207
"""Get function for writing output with search string highlighted"""
245
208
_line_writer = self._get_writer_plain()
246
209
old, new = self._old, self._new
248
210
def _line_writer_fixed_highlighted(line):
249
211
"""Write formatted line with string searched for highlighted"""
250
212
return _line_writer(line=line.replace(old, new))
261
223
# if no revision is sepcified for diff grep we grep all changesets.
262
224
opts.revision = [RevisionSpec.from_string('revno:1'),
263
RevisionSpec.from_string('last:1')]
225
RevisionSpec.from_string('last:1')]
264
226
start_rev = opts.revision[0]
265
227
start_revid = start_rev.as_revision_id(branch)
266
if start_revid == b'null:':
228
if start_revid == 'null:':
268
230
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
269
231
if len(opts.revision) == 2:
273
235
end_revno, end_revid = branch.last_revision_info()
274
236
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
276
grep_mainline = (_rev_on_mainline(srevno_tuple)
277
and _rev_on_mainline(erevno_tuple))
238
grep_mainline = (_rev_on_mainline(srevno_tuple) and
239
_rev_on_mainline(erevno_tuple))
279
241
# ensure that we go in reverse order
280
242
if srevno_tuple > erevno_tuple:
285
247
# faster when we don't want to look at merged revs. We try this
286
248
# with _linear_view_revisions. If all revs are to be grepped we
287
249
# use the slower _graph_view_revisions
288
if opts.levels == 1 and grep_mainline:
289
given_revs = _linear_view_revisions(
290
branch, start_revid, end_revid)
250
if opts.levels==1 and grep_mainline:
251
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
292
given_revs = _graph_view_revisions(
293
branch, start_revid, end_revid)
253
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
295
255
# We do an optimization below. For grepping a specific revison
296
256
# We don't need to call _graph_view_revisions which is slow.
300
260
start_rev_tuple = (start_revid, start_revno, 0)
301
261
given_revs = [start_rev_tuple]
302
262
repo = branch.repository
303
diff_pattern = re.compile(
304
b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
305
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
263
diff_pattern = re.compile("^[+\-].*(" + opts.pattern + ")")
264
file_pattern = re.compile("=== (modified|added|removed) file '.*'", re.UNICODE)
306
265
outputter = _GrepDiffOutputter(opts)
307
266
writeline = outputter.get_writer()
308
267
writerevno = outputter.get_revision_header_writer()
313
272
# with level=1 show only top level
316
rev_spec = RevisionSpec_revid.from_string(
317
"revid:" + revid.decode('utf-8'))
275
rev_spec = RevisionSpec_revid.from_string("revid:"+revid)
318
276
new_rev = repo.get_revision(revid)
319
277
new_tree = rev_spec.as_tree(branch)
320
278
if len(new_rev.parent_ids) == 0:
324
282
old_tree = repo.revision_tree(ancestor_id)
326
284
diff.show_diff_trees(old_tree, new_tree, s,
327
old_label='', new_label='')
285
old_label='', new_label='')
328
286
display_revno = True
329
287
display_file = False
330
288
file_header = None
338
296
writerevno("=== revno:%s ===" % (revno,))
339
297
display_revno = False
342
" %s" % (file_header.decode(file_encoding, 'replace'),))
299
writefileheader(" %s" % (file_header,))
343
300
display_file = False
344
301
line = line.decode(file_encoding, 'replace')
345
302
writeline(" %s" % (line,))
363
320
end_revno, end_revid = branch.last_revision_info()
364
321
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
366
grep_mainline = (_rev_on_mainline(srevno_tuple)
367
and _rev_on_mainline(erevno_tuple))
323
grep_mainline = (_rev_on_mainline(srevno_tuple) and
324
_rev_on_mainline(erevno_tuple))
369
326
# ensure that we go in reverse order
370
327
if srevno_tuple > erevno_tuple:
376
333
# with _linear_view_revisions. If all revs are to be grepped we
377
334
# use the slower _graph_view_revisions
378
335
if opts.levels == 1 and grep_mainline:
379
given_revs = _linear_view_revisions(
380
branch, start_revid, end_revid)
336
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
382
given_revs = _graph_view_revisions(
383
branch, start_revid, end_revid)
338
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
385
340
# We do an optimization below. For grepping a specific revison
386
341
# We don't need to call _graph_view_revisions which is slow.
398
353
# with level=1 show only top level
401
rev = RevisionSpec_revid.from_string(
402
"revid:" + revid.decode('utf-8'))
356
rev = RevisionSpec_revid.from_string("revid:"+revid)
403
357
tree = rev.as_tree(branch)
404
358
for path in opts.path_list:
405
359
tree_path = osutils.pathjoin(relpath, path)
406
360
if not tree.has_filename(tree_path):
407
trace.warning("Skipped unknown file '%s'.", path)
361
trace.warning("Skipped unknown file '%s'." % path)
410
364
if osutils.isdir(path):
411
365
path_prefix = path
412
366
dir_grep(tree, path, relpath, opts, revno, path_prefix)
415
tree, tree_path, '.', path, opts, revno)
368
versioned_file_grep(tree, tree_path, '.', path, opts, revno)
418
371
def workingtree_grep(opts):
419
revno = opts.print_revno = None # for working tree set revno to None
372
revno = opts.print_revno = None # for working tree set revno to None
421
374
tree, branch, relpath = \
422
375
controldir.ControlDir.open_containing_tree_or_branch('.')
424
377
msg = ('Cannot search working tree. Working tree not found.\n'
425
'To search for specific revision in history use the -r option.')
378
'To search for specific revision in history use the -r option.')
426
379
raise errors.BzrCommandError(msg)
428
381
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
434
387
path_prefix = path
435
388
dir_grep(tree, path, relpath, opts, revno, path_prefix)
437
with open(path, 'rb') as f:
438
_file_grep(f.read(), path, opts, revno)
390
_file_grep(open(path).read(), path, opts, revno)
441
393
def _skip_file(include, exclude, path):
464
416
# and hits manually refilled. Could do this again if it was
465
417
# for a good reason, otherwise cache might want purging.
466
418
outputter = opts.outputter
467
for fp, fc, fkind, entry in tree.list_files(
468
include_root=False, from_dir=from_dir, recursive=opts.recursive):
419
for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
420
from_dir=from_dir, recursive=opts.recursive):
470
422
if _skip_file(opts.include, opts.exclude, fp):
476
428
# If old result is valid, print results immediately.
477
429
# Otherwise, add file info to to_grep so that the
478
430
# loop later will get chunks and grep them
479
cache_id = tree.get_file_revision(tree_path)
431
cache_id = tree.get_file_revision(tree_path, fid)
480
432
if cache_id in outputter.cache:
481
433
# GZ 2010-06-05: Not really sure caching and re-outputting
482
434
# the old path is really the right thing,
493
445
if opts.files_with_matches or opts.files_without_match:
494
446
# Optimize for wtree list-only as we don't need to read the
496
with open(path_for_file, 'rb', buffering=4096) as file:
497
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
448
file = open(path_for_file, 'r', buffering=4096)
449
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
499
with open(path_for_file, 'rb') as f:
500
_file_grep(f.read(), fp, opts, revno, path_prefix)
451
file_text = open(path_for_file, 'r').read()
452
_file_grep(file_text, fp, opts, revno, path_prefix)
502
if revno is not None: # grep versioned files
454
if revno is not None: # grep versioned files
503
455
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
504
456
path = _make_display_path(relpath, path)
505
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
506
tree.get_file_revision(tree_path))
457
_file_grep(''.join(chunks), path, opts, revno, path_prefix,
458
tree.get_file_revision(tree_path))
509
461
def _make_display_path(relpath, path):
524
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix=None):
476
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix = None):
525
477
"""Create a file object for the specified id and pass it on to _file_grep.
540
492
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
541
493
# test and skip binary files
542
if b'\x00' in file.read(1024):
494
if '\x00' in file.read(1024):
544
trace.warning("Binary file '%s' skipped.", path)
496
trace.warning("Binary file '%s' skipped." % path)
547
file.seek(0) # search from beginning
499
file.seek(0) # search from beginning
550
502
if opts.fixed_string:
553
505
if pattern in line:
556
else: # not fixed_string
508
else: # not fixed_string
557
509
for line in file:
558
510
if opts.patternc.search(line):
562
514
if (opts.files_with_matches and found) or \
563
(opts.files_without_match and not found):
515
(opts.files_without_match and not found):
564
516
if path_prefix and path_prefix != '.':
565
517
# user has passed a dir arg, show that as result prefix
566
518
path = osutils.pathjoin(path_prefix, path)
573
525
The idea here is to do this work only once per run, and finally return a
574
526
function that will do the minimum amount possible for each match.
577
528
def __init__(self, opts, use_cache=False):
578
529
self.outf = opts.outf
588
539
no_line = opts.files_with_matches or opts.files_without_match
590
541
if opts.show_color:
542
pat = opts.pattern.encode(_user_encoding, 'replace')
592
544
self.get_writer = self._get_writer_plain
593
545
elif opts.fixed_string:
594
self._old = opts.pattern
595
self._new = color_string(opts.pattern, FG.BOLD_RED)
547
self._new = color_string(pat, FG.BOLD_RED)
596
548
self.get_writer = self._get_writer_fixed_highlighted
598
550
flags = opts.patternc.flags
599
self._sub = re.compile(
600
opts.pattern.join(("((?:", ")+)")), flags).sub
551
self._sub = re.compile(pat.join(("((?:", ")+)")), flags).sub
601
552
self._highlight = color_string("\\1", FG.BOLD_RED)
602
553
self.get_writer = self._get_writer_regexp_highlighted
603
554
path_start = FG.MAGENTA
628
579
def _get_writer_plain(self, path, revno, cache_id):
629
580
"""Get function for writing uncoloured output"""
630
581
per_line = self._format_perline
631
start = self._format_initial % {"path": path, "revno": revno}
582
start = self._format_initial % {"path":path, "revno":revno}
632
583
write = self.outf.write
633
584
if self.cache is not None and cache_id is not None:
635
586
self.cache[cache_id] = path, result_list
636
587
add_to_cache = result_list.append
638
588
def _line_cache_and_writer(**kwargs):
639
589
"""Write formatted line and cache arguments"""
640
590
end = per_line % kwargs
641
591
add_to_cache(end)
642
592
write(start + end)
643
593
return _line_cache_and_writer
645
594
def _line_writer(**kwargs):
646
595
"""Write formatted line from arguments given by underlying opts"""
647
596
write(start + per_line % kwargs)
650
599
def write_cached_lines(self, cache_id, revno):
651
600
"""Write cached results out again for new revision"""
652
601
cached_path, cached_matches = self.cache[cache_id]
653
start = self._format_initial % {"path": cached_path, "revno": revno}
602
start = self._format_initial % {"path":cached_path, "revno":revno}
654
603
write = self.outf.write
655
604
for end in cached_matches:
656
605
write(start + end)
659
608
"""Get function for writing output with regexp match highlighted"""
660
609
_line_writer = self._get_writer_plain(path, revno, cache_id)
661
610
sub, highlight = self._sub, self._highlight
663
611
def _line_writer_regexp_highlighted(line, **kwargs):
664
612
"""Write formatted line with matched pattern highlighted"""
665
613
return _line_writer(line=sub(highlight, line), **kwargs)
669
617
"""Get function for writing output with search string highlighted"""
670
618
_line_writer = self._get_writer_plain(path, revno, cache_id)
671
619
old, new = self._old, self._new
673
620
def _line_writer_fixed_highlighted(line, **kwargs):
674
621
"""Write formatted line with string searched for highlighted"""
675
622
return _line_writer(line=line.replace(old, new), **kwargs)
679
626
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
680
627
# test and skip binary files
681
if b'\x00' in file_text[:1024]:
628
if '\x00' in file_text[:1024]:
683
trace.warning("Binary file '%s' skipped.", path)
630
trace.warning("Binary file '%s' skipped." % path)
686
633
if path_prefix and path_prefix != '.':
718
665
i = file_text.find(pattern)
721
b = file_text.rfind(b"\n", 0, i) + 1
668
b = file_text.rfind("\n", 0, i) + 1
722
669
if opts.line_number:
723
start = file_text.count(b"\n", 0, b) + 1
670
start = file_text.count("\n", 0, b) + 1
724
671
file_text = file_text[b:]
725
672
if opts.line_number:
726
673
for index, line in enumerate(file_text.splitlines()):
727
674
if pattern in line:
728
675
line = line.decode(file_encoding, 'replace')
729
writeline(lineno=index + start, line=line)
676
writeline(lineno=index+start, line=line)
731
678
for line in file_text.splitlines():
732
679
if pattern in line:
737
684
# standard cases, but perhaps could try and detect backtracking
738
685
# patterns here and avoid whole text search in those cases
739
686
search = opts.patternc.search
740
if b"$" not in pattern:
687
if "$" not in pattern:
741
688
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
742
689
# through revisions as bazaar returns binary mode
743
690
# and trailing \r breaks $ as line ending match
744
691
m = search(file_text)
747
b = file_text.rfind(b"\n", 0, m.start()) + 1
694
b = file_text.rfind("\n", 0, m.start()) + 1
748
695
if opts.line_number:
749
start = file_text.count(b"\n", 0, b) + 1
696
start = file_text.count("\n", 0, b) + 1
750
697
file_text = file_text[b:]
754
701
for index, line in enumerate(file_text.splitlines()):
756
703
line = line.decode(file_encoding, 'replace')
757
writeline(lineno=index + start, line=line)
704
writeline(lineno=index+start, line=line)
759
706
for line in file_text.splitlines():
761
708
line = line.decode(file_encoding, 'replace')
762
709
writeline(line=line)