13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from io import BytesIO
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from __future__ import absolute_import
20
from .lazy_import import lazy_import
22
from ...lazy_import import lazy_import
21
23
lazy_import(globals(), """
22
24
from fnmatch import fnmatch
24
26
from breezy._termcolor import color_string, FG
28
from breezy.revisionspec import (
26
33
from breezy import (
38
revision as _mod_revision,
34
revision as _mod_revision,
37
from .revisionspec import (
45
from breezy.sixish import (
43
49
_user_encoding = osutils.get_user_encoding()
47
53
"""Raised when a revision is not on left-hand history."""
50
class GrepOptions(object):
51
"""Container to pass around grep options.
53
This class is used as a container to pass around user option and
54
some other params (like outf) to processing functions. This makes
55
it easier to add more options as grep evolves.
70
files_with_matches = False
71
files_without_match = False
86
56
def _rev_on_mainline(rev_tuple):
87
57
"""returns True is rev tuple is on mainline"""
88
58
if len(rev_tuple) == 1:
153
123
def compile_pattern(pattern, flags=0):
155
return re.compile(pattern, flags)
126
# use python's re.compile as we need to catch re.error in case of bad pattern
127
lazy_regex.reset_compile()
128
patternc = re.compile(pattern, flags)
156
129
except re.error as e:
157
130
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
161
134
def is_fixed_string(s):
162
if re.match("^([A-Za-z0-9_]|\\s)*$", s):
135
if re.match("^([A-Za-z0-9_]|\s)*$", s):
173
146
self.outf = opts.outf
174
147
if opts.show_color:
148
pat = opts.pattern.encode(_user_encoding, 'replace')
175
149
if opts.fixed_string:
176
self._old = opts.pattern
177
self._new = color_string(opts.pattern, FG.BOLD_RED)
151
self._new = color_string(pat, FG.BOLD_RED)
178
152
self.get_writer = self._get_writer_fixed_highlighted
180
154
flags = opts.patternc.flags
181
self._sub = re.compile(
182
opts.pattern.join(("((?:", ")+)")), flags).sub
155
self._sub = re.compile(pat.join(("((?:",")+)")), flags).sub
183
156
self._highlight = color_string("\\1", FG.BOLD_RED)
184
157
self.get_writer = self._get_writer_regexp_highlighted
230
198
"""Get function for writing output with regexp match highlighted"""
231
199
_line_writer = self._get_writer_plain()
232
200
sub, highlight = self._sub, self._highlight
234
201
def _line_writer_regexp_highlighted(line):
235
202
"""Write formatted line with matched pattern highlighted"""
236
203
return _line_writer(line=sub(highlight, line))
240
207
"""Get function for writing output with search string highlighted"""
241
208
_line_writer = self._get_writer_plain()
242
209
old, new = self._old, self._new
244
210
def _line_writer_fixed_highlighted(line):
245
211
"""Write formatted line with string searched for highlighted"""
246
212
return _line_writer(line=line.replace(old, new))
250
216
def grep_diff(opts):
251
217
wt, branch, relpath = \
252
218
controldir.ControlDir.open_containing_tree_or_branch('.')
253
with branch.lock_read():
254
221
if opts.revision:
255
222
start_rev = opts.revision[0]
257
224
# if no revision is sepcified for diff grep we grep all changesets.
258
225
opts.revision = [RevisionSpec.from_string('revno:1'),
259
RevisionSpec.from_string('last:1')]
226
RevisionSpec.from_string('last:1')]
260
227
start_rev = opts.revision[0]
261
228
start_revid = start_rev.as_revision_id(branch)
262
if start_revid == b'null:':
229
if start_revid == 'null:':
264
231
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
265
232
if len(opts.revision) == 2:
269
236
end_revno, end_revid = branch.last_revision_info()
270
237
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
272
grep_mainline = (_rev_on_mainline(srevno_tuple)
273
and _rev_on_mainline(erevno_tuple))
239
grep_mainline = (_rev_on_mainline(srevno_tuple) and
240
_rev_on_mainline(erevno_tuple))
275
242
# ensure that we go in reverse order
276
243
if srevno_tuple > erevno_tuple:
281
248
# faster when we don't want to look at merged revs. We try this
282
249
# with _linear_view_revisions. If all revs are to be grepped we
283
250
# use the slower _graph_view_revisions
284
if opts.levels == 1 and grep_mainline:
285
given_revs = _linear_view_revisions(
286
branch, start_revid, end_revid)
251
if opts.levels==1 and grep_mainline:
252
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
288
given_revs = _graph_view_revisions(
289
branch, start_revid, end_revid)
254
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
291
256
# We do an optimization below. For grepping a specific revison
292
257
# We don't need to call _graph_view_revisions which is slow.
296
261
start_rev_tuple = (start_revid, start_revno, 0)
297
262
given_revs = [start_rev_tuple]
298
263
repo = branch.repository
299
diff_pattern = re.compile(
300
b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
301
file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
264
diff_pattern = re.compile("^[+\-].*(" + opts.pattern + ")")
265
file_pattern = re.compile("=== (modified|added|removed) file '.*'", re.UNICODE)
302
266
outputter = _GrepDiffOutputter(opts)
303
267
writeline = outputter.get_writer()
304
268
writerevno = outputter.get_revision_header_writer()
309
273
# with level=1 show only top level
312
rev_spec = RevisionSpec_revid.from_string(
313
"revid:" + revid.decode('utf-8'))
276
rev_spec = RevisionSpec_revid.from_string("revid:"+revid)
314
277
new_rev = repo.get_revision(revid)
315
278
new_tree = rev_spec.as_tree(branch)
316
279
if len(new_rev.parent_ids) == 0:
334
297
writerevno("=== revno:%s ===" % (revno,))
335
298
display_revno = False
338
" %s" % (file_header.decode(file_encoding, 'replace'),))
300
writefileheader(" %s" % (file_header,))
339
301
display_file = False
340
302
line = line.decode(file_encoding, 'replace')
341
303
writeline(" %s" % (line,))
344
308
def versioned_grep(opts):
345
309
wt, branch, relpath = \
346
310
controldir.ControlDir.open_containing_tree_or_branch('.')
347
with branch.lock_read():
348
313
start_rev = opts.revision[0]
349
314
start_revid = start_rev.as_revision_id(branch)
350
315
if start_revid is None:
359
324
end_revno, end_revid = branch.last_revision_info()
360
325
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
362
grep_mainline = (_rev_on_mainline(srevno_tuple)
363
and _rev_on_mainline(erevno_tuple))
327
grep_mainline = (_rev_on_mainline(srevno_tuple) and
328
_rev_on_mainline(erevno_tuple))
365
330
# ensure that we go in reverse order
366
331
if srevno_tuple > erevno_tuple:
372
337
# with _linear_view_revisions. If all revs are to be grepped we
373
338
# use the slower _graph_view_revisions
374
339
if opts.levels == 1 and grep_mainline:
375
given_revs = _linear_view_revisions(
376
branch, start_revid, end_revid)
340
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
378
given_revs = _graph_view_revisions(
379
branch, start_revid, end_revid)
342
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
381
344
# We do an optimization below. For grepping a specific revison
382
345
# We don't need to call _graph_view_revisions which is slow.
394
357
# with level=1 show only top level
397
rev = RevisionSpec_revid.from_string(
398
"revid:" + revid.decode('utf-8'))
360
rev = RevisionSpec_revid.from_string("revid:"+revid)
399
361
tree = rev.as_tree(branch)
400
362
for path in opts.path_list:
401
tree_path = osutils.pathjoin(relpath, path)
402
if not tree.has_filename(tree_path):
403
trace.warning("Skipped unknown file '%s'.", path)
363
path_for_id = osutils.pathjoin(relpath, path)
364
id = tree.path2id(path_for_id)
366
trace.warning("Skipped unknown file '%s'." % path)
406
369
if osutils.isdir(path):
407
370
path_prefix = path
408
371
dir_grep(tree, path, relpath, opts, revno, path_prefix)
411
tree, tree_path, '.', path, opts, revno)
373
versioned_file_grep(tree, id, '.', path, opts, revno)
414
378
def workingtree_grep(opts):
415
revno = opts.print_revno = None # for working tree set revno to None
379
revno = opts.print_revno = None # for working tree set revno to None
417
381
tree, branch, relpath = \
418
382
controldir.ControlDir.open_containing_tree_or_branch('.')
420
384
msg = ('Cannot search working tree. Working tree not found.\n'
421
'To search for specific revision in history use the -r option.')
422
raise errors.CommandError(msg)
385
'To search for specific revision in history use the -r option.')
386
raise errors.BzrCommandError(msg)
424
388
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
425
389
opts.outputter = _Outputter(opts)
427
with tree.lock_read():
428
393
for path in opts.path_list:
429
394
if osutils.isdir(path):
430
395
path_prefix = path
431
396
dir_grep(tree, path, relpath, opts, revno, path_prefix)
433
with open(path, 'rb') as f:
434
_file_grep(f.read(), path, opts, revno)
398
_file_grep(open(path).read(), path, opts, revno)
437
403
def _skip_file(include, exclude, path):
446
412
# setup relpath to open files relative to cwd
449
rpath = osutils.pathjoin('..', relpath)
415
rpath = osutils.pathjoin('..',relpath)
451
417
from_dir = osutils.pathjoin(relpath, path)
452
418
if opts.from_root:
453
419
# start searching recursively from root
458
424
to_grep_append = to_grep.append
460
426
# and hits manually refilled. Could do this again if it was
461
427
# for a good reason, otherwise cache might want purging.
462
428
outputter = opts.outputter
463
for fp, fc, fkind, entry in tree.list_files(
464
include_root=False, from_dir=from_dir, recursive=opts.recursive):
429
for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
430
from_dir=from_dir, recursive=opts.recursive):
466
432
if _skip_file(opts.include, opts.exclude, fp):
469
435
if fc == 'V' and fkind == 'file':
470
tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
471
if revno is not None:
472
437
# If old result is valid, print results immediately.
473
438
# Otherwise, add file info to to_grep so that the
474
439
# loop later will get chunks and grep them
475
cache_id = tree.get_file_revision(tree_path)
440
cache_id = tree.get_file_revision(fid)
476
441
if cache_id in outputter.cache:
477
442
# GZ 2010-06-05: Not really sure caching and re-outputting
478
443
# the old path is really the right thing,
479
444
# but it's what the old code seemed to do
480
445
outputter.write_cached_lines(cache_id, revno)
482
to_grep_append((tree_path, (fp, tree_path)))
447
to_grep_append((fid, (fp, fid)))
484
449
# we are grepping working tree.
485
450
if from_dir is None:
489
454
if opts.files_with_matches or opts.files_without_match:
490
455
# Optimize for wtree list-only as we don't need to read the
492
with open(path_for_file, 'rb', buffering=4096) as file:
493
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
457
file = open(path_for_file, 'r', buffering=4096)
458
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
495
with open(path_for_file, 'rb') as f:
496
_file_grep(f.read(), fp, opts, revno, path_prefix)
460
file_text = open(path_for_file, 'r').read()
461
_file_grep(file_text, fp, opts, revno, path_prefix)
498
if revno is not None: # grep versioned files
499
for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
463
if revno != None: # grep versioned files
464
for (path, fid), chunks in tree.iter_files_bytes(to_grep):
500
465
path = _make_display_path(relpath, path)
501
_file_grep(b''.join(chunks), path, opts, revno, path_prefix,
502
tree.get_file_revision(tree_path))
466
_file_grep(chunks[0], path, opts, revno, path_prefix,
467
tree.get_file_revision(fid, path))
505
470
def _make_display_path(relpath, path):
520
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix=None):
485
def versioned_file_grep(tree, id, relpath, path, opts, revno, path_prefix = None):
521
486
"""Create a file object for the specified id and pass it on to _file_grep.
524
489
path = _make_display_path(relpath, path)
525
file_text = tree.get_file_text(tree_path)
490
file_text = tree.get_file_text(id)
526
491
_file_grep(file_text, path, opts, revno, path_prefix)
536
501
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
537
502
# test and skip binary files
538
if b'\x00' in file.read(1024):
503
if '\x00' in file.read(1024):
540
trace.warning("Binary file '%s' skipped.", path)
505
trace.warning("Binary file '%s' skipped." % path)
543
file.seek(0) # search from beginning
508
file.seek(0) # search from beginning
546
511
if opts.fixed_string:
549
514
if pattern in line:
552
else: # not fixed_string
517
else: # not fixed_string
553
518
for line in file:
554
519
if opts.patternc.search(line):
558
523
if (opts.files_with_matches and found) or \
559
(opts.files_without_match and not found):
524
(opts.files_without_match and not found):
560
525
if path_prefix and path_prefix != '.':
561
526
# user has passed a dir arg, show that as result prefix
562
527
path = osutils.pathjoin(path_prefix, path)
584
548
no_line = opts.files_with_matches or opts.files_without_match
586
550
if opts.show_color:
551
pat = opts.pattern.encode(_user_encoding, 'replace')
588
553
self.get_writer = self._get_writer_plain
589
554
elif opts.fixed_string:
590
self._old = opts.pattern
591
self._new = color_string(opts.pattern, FG.BOLD_RED)
556
self._new = color_string(pat, FG.BOLD_RED)
592
557
self.get_writer = self._get_writer_fixed_highlighted
594
559
flags = opts.patternc.flags
595
self._sub = re.compile(
596
opts.pattern.join(("((?:", ")+)")), flags).sub
560
self._sub = re.compile(pat.join(("((?:",")+)")), flags).sub
597
561
self._highlight = color_string("\\1", FG.BOLD_RED)
598
562
self.get_writer = self._get_writer_regexp_highlighted
599
563
path_start = FG.MAGENTA
624
588
def _get_writer_plain(self, path, revno, cache_id):
625
589
"""Get function for writing uncoloured output"""
626
590
per_line = self._format_perline
627
start = self._format_initial % {"path": path, "revno": revno}
591
start = self._format_initial % {"path":path, "revno":revno}
628
592
write = self.outf.write
629
593
if self.cache is not None and cache_id is not None:
631
595
self.cache[cache_id] = path, result_list
632
596
add_to_cache = result_list.append
634
597
def _line_cache_and_writer(**kwargs):
635
598
"""Write formatted line and cache arguments"""
636
599
end = per_line % kwargs
637
600
add_to_cache(end)
638
601
write(start + end)
639
602
return _line_cache_and_writer
641
603
def _line_writer(**kwargs):
642
604
"""Write formatted line from arguments given by underlying opts"""
643
605
write(start + per_line % kwargs)
646
608
def write_cached_lines(self, cache_id, revno):
647
609
"""Write cached results out again for new revision"""
648
610
cached_path, cached_matches = self.cache[cache_id]
649
start = self._format_initial % {"path": cached_path, "revno": revno}
611
start = self._format_initial % {"path":cached_path, "revno":revno}
650
612
write = self.outf.write
651
613
for end in cached_matches:
652
614
write(start + end)
655
617
"""Get function for writing output with regexp match highlighted"""
656
618
_line_writer = self._get_writer_plain(path, revno, cache_id)
657
619
sub, highlight = self._sub, self._highlight
659
620
def _line_writer_regexp_highlighted(line, **kwargs):
660
621
"""Write formatted line with matched pattern highlighted"""
661
622
return _line_writer(line=sub(highlight, line), **kwargs)
665
626
"""Get function for writing output with search string highlighted"""
666
627
_line_writer = self._get_writer_plain(path, revno, cache_id)
667
628
old, new = self._old, self._new
669
629
def _line_writer_fixed_highlighted(line, **kwargs):
670
630
"""Write formatted line with string searched for highlighted"""
671
631
return _line_writer(line=line.replace(old, new), **kwargs)
675
635
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
676
636
# test and skip binary files
677
if b'\x00' in file_text[:1024]:
637
if '\x00' in file_text[:1024]:
679
trace.warning("Binary file '%s' skipped.", path)
639
trace.warning("Binary file '%s' skipped." % path)
682
642
if path_prefix and path_prefix != '.':
714
674
i = file_text.find(pattern)
717
b = file_text.rfind(b"\n", 0, i) + 1
677
b = file_text.rfind("\n", 0, i) + 1
718
678
if opts.line_number:
719
start = file_text.count(b"\n", 0, b) + 1
679
start = file_text.count("\n", 0, b) + 1
720
680
file_text = file_text[b:]
721
681
if opts.line_number:
722
682
for index, line in enumerate(file_text.splitlines()):
723
683
if pattern in line:
724
684
line = line.decode(file_encoding, 'replace')
725
writeline(lineno=index + start, line=line)
685
writeline(lineno=index+start, line=line)
727
687
for line in file_text.splitlines():
728
688
if pattern in line:
733
693
# standard cases, but perhaps could try and detect backtracking
734
694
# patterns here and avoid whole text search in those cases
735
695
search = opts.patternc.search
736
if b"$" not in pattern:
696
if "$" not in pattern:
737
697
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
738
698
# through revisions as bazaar returns binary mode
739
699
# and trailing \r breaks $ as line ending match
740
700
m = search(file_text)
743
b = file_text.rfind(b"\n", 0, m.start()) + 1
703
b = file_text.rfind("\n", 0, m.start()) + 1
744
704
if opts.line_number:
745
start = file_text.count(b"\n", 0, b) + 1
705
start = file_text.count("\n", 0, b) + 1
746
706
file_text = file_text[b:]
750
710
for index, line in enumerate(file_text.splitlines()):
752
712
line = line.decode(file_encoding, 'replace')
753
writeline(lineno=index + start, line=line)
713
writeline(lineno=index+start, line=line)
755
715
for line in file_text.splitlines():
757
717
line = line.decode(file_encoding, 'replace')
758
718
writeline(line=line)