/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.40.10 by Parth Malwankar
assigned copyright to canonical
1
# Copyright (C) 2010 Canonical Ltd
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
0.40.147 by Jelmer Vernooij
Fix compatibility with newer versions of bzr: don't use relative imports in lazy imports, and import features from bzrlib.tests.features.
17
from __future__ import absolute_import
18
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
19
import re
0.47.1 by Martin
Implement whole text search for fast failure on no match
20
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
21
from ...lazy_import import lazy_import
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
22
lazy_import(globals(), """
0.40.83 by Parth Malwankar
added support for -F/--fixed-string.
23
from fnmatch import fnmatch
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
24
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
25
from breezy._termcolor import color_string, FG
0.43.4 by Parth Malwankar
initial support for color for fixed string grep.
26
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
27
from breezy import (
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
28
    controldir,
0.48.5 by Parth Malwankar
fixed imports
29
    diff,
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
30
    errors,
31
    lazy_regex,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
32
    revision as _mod_revision,
33
    )
34
""")
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
35
from breezy import (
0.40.47 by Parth Malwankar
fixes bug #531336. binary files are now skipped.
36
    osutils,
37
    trace,
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
38
    )
6800.1.5 by Jelmer Vernooij
Fix more imports.
39
from breezy.revisionspec import (
40
    RevisionSpec,
41
    RevisionSpec_revid,
42
    RevisionSpec_revno,
43
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
44
from breezy.sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
45
    BytesIO,
46
    )
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
47
0.40.83 by Parth Malwankar
added support for -F/--fixed-string.
48
_user_encoding = osutils.get_user_encoding()
49
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
50
0.40.95 by Parth Malwankar
faster mainline rev grep
51
class _RevisionNotLinear(Exception):
52
    """Raised when a revision is not on left-hand history."""
53
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
54
0.40.95 by Parth Malwankar
faster mainline rev grep
55
def _rev_on_mainline(rev_tuple):
56
    """returns True is rev tuple is on mainline"""
57
    if len(rev_tuple) == 1:
58
        return True
59
    return rev_tuple[1] == 0 and rev_tuple[2] == 0
60
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
61
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
62
# NOTE: _linear_view_revisions is basided on
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
63
# breezy.log._linear_view_revisions.
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
64
# This should probably be a common public API
0.40.95 by Parth Malwankar
faster mainline rev grep
65
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
0.40.106 by Parth Malwankar
fixed error in dotted rev reverse search.
66
    # requires that start is older than end
0.40.95 by Parth Malwankar
faster mainline rev grep
67
    repo = branch.repository
6531.3.6 by Jelmer Vernooij
Use iter_lefthand_ancestry rather than removed iter_reverse_revision_history.
68
    graph = repo.get_graph()
6531.3.7 by Jelmer Vernooij
Formatting.
69
    for revision_id in graph.iter_lefthand_ancestry(
70
            end_rev_id, (_mod_revision.NULL_REVISION, )):
0.40.95 by Parth Malwankar
faster mainline rev grep
71
        revno = branch.revision_id_to_dotted_revno(revision_id)
72
        revno_str = '.'.join(str(n) for n in revno)
73
        if revision_id == start_rev_id:
74
            yield revision_id, revno_str, 0
75
            break
76
        yield revision_id, revno_str, 0
77
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
78
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
79
# NOTE: _graph_view_revisions is copied from
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
80
# breezy.log._graph_view_revisions.
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
81
# This should probably be a common public API
82
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
83
                          rebase_initial_depths=True):
84
    """Calculate revisions to view including merges, newest to oldest.
85
86
    :param branch: the branch
87
    :param start_rev_id: the lower revision-id
88
    :param end_rev_id: the upper revision-id
89
    :param rebase_initial_depth: should depths be rebased until a mainline
90
      revision is found?
91
    :return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
92
    """
0.40.106 by Parth Malwankar
fixed error in dotted rev reverse search.
93
    # requires that start is older than end
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
94
    view_revisions = branch.iter_merge_sorted_revisions(
95
        start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
96
        stop_rule="with-merges")
97
    if not rebase_initial_depths:
98
        for (rev_id, merge_depth, revno, end_of_merge
99
             ) in view_revisions:
100
            yield rev_id, '.'.join(map(str, revno)), merge_depth
101
    else:
102
        # We're following a development line starting at a merged revision.
103
        # We need to adjust depths down by the initial depth until we find
104
        # a depth less than it. Then we use that depth as the adjustment.
105
        # If and when we reach the mainline, depth adjustment ends.
106
        depth_adjustment = None
107
        for (rev_id, merge_depth, revno, end_of_merge
108
             ) in view_revisions:
109
            if depth_adjustment is None:
110
                depth_adjustment = merge_depth
111
            if depth_adjustment:
112
                if merge_depth < depth_adjustment:
113
                    # From now on we reduce the depth adjustement, this can be
114
                    # surprising for users. The alternative requires two passes
115
                    # which breaks the fast display of the first revision
116
                    # though.
117
                    depth_adjustment = merge_depth
118
                merge_depth -= depth_adjustment
119
            yield rev_id, '.'.join(map(str, revno)), merge_depth
120
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
121
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
122
def compile_pattern(pattern, flags=0):
123
    patternc = None
124
    try:
125
        # use python's re.compile as we need to catch re.error in case of bad pattern
126
        lazy_regex.reset_compile()
127
        patternc = re.compile(pattern, flags)
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
128
    except re.error as e:
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
129
        raise errors.BzrError("Invalid pattern: '%s'" % pattern)
130
    return patternc
131
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
132
0.40.86 by Parth Malwankar
the check for implicit fixed_string now allows for spaces.
133
def is_fixed_string(s):
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
134
    if re.match("^([A-Za-z0-9_]|\\s)*$", s):
0.40.86 by Parth Malwankar
the check for implicit fixed_string now allows for spaces.
135
        return True
136
    return False
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
137
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
138
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
139
class _GrepDiffOutputter(object):
140
    """Precalculate formatting based on options given for diff grep.
141
    """
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
142
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
143
    def __init__(self, opts):
0.48.8 by Parth Malwankar
colored header for diff grep output
144
        self.opts = opts
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
145
        self.outf = opts.outf
146
        if opts.show_color:
147
            if opts.fixed_string:
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
148
                self._old = opts.pattern
149
                self._new = color_string(opts.pattern, FG.BOLD_RED)
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
150
                self.get_writer = self._get_writer_fixed_highlighted
151
            else:
152
                flags = opts.patternc.flags
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
153
                self._sub = re.compile(opts.pattern.join(("((?:", ")+)")), flags).sub
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
154
                self._highlight = color_string("\\1", FG.BOLD_RED)
155
                self.get_writer = self._get_writer_regexp_highlighted
156
        else:
157
            self.get_writer = self._get_writer_plain
158
0.48.8 by Parth Malwankar
colored header for diff grep output
159
    def get_file_header_writer(self):
160
        """Get function for writing file headers"""
161
        write = self.outf.write
162
        eol_marker = self.opts.eol_marker
163
        def _line_writer(line):
164
            write(line + eol_marker)
165
        def _line_writer_color(line):
166
            write(FG.BOLD_MAGENTA + line + FG.NONE + eol_marker)
167
        if self.opts.show_color:
168
            return _line_writer_color
169
        else:
170
            return _line_writer
171
        return _line_writer
172
173
    def get_revision_header_writer(self):
174
        """Get function for writing revno lines"""
175
        write = self.outf.write
176
        eol_marker = self.opts.eol_marker
177
        def _line_writer(line):
178
            write(line + eol_marker)
179
        def _line_writer_color(line):
180
            write(FG.BOLD_BLUE + line + FG.NONE + eol_marker)
181
        if self.opts.show_color:
182
            return _line_writer_color
183
        else:
184
            return _line_writer
185
        return _line_writer
186
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
187
    def _get_writer_plain(self):
188
        """Get function for writing uncoloured output"""
189
        write = self.outf.write
0.48.8 by Parth Malwankar
colored header for diff grep output
190
        eol_marker = self.opts.eol_marker
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
191
        def _line_writer(line):
0.48.8 by Parth Malwankar
colored header for diff grep output
192
            write(line + eol_marker)
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
193
        return _line_writer
194
195
    def _get_writer_regexp_highlighted(self):
196
        """Get function for writing output with regexp match highlighted"""
197
        _line_writer = self._get_writer_plain()
198
        sub, highlight = self._sub, self._highlight
199
        def _line_writer_regexp_highlighted(line):
200
            """Write formatted line with matched pattern highlighted"""
201
            return _line_writer(line=sub(highlight, line))
202
        return _line_writer_regexp_highlighted
203
204
    def _get_writer_fixed_highlighted(self):
205
        """Get function for writing output with search string highlighted"""
206
        _line_writer = self._get_writer_plain()
207
        old, new = self._old, self._new
208
        def _line_writer_fixed_highlighted(line):
209
            """Write formatted line with string searched for highlighted"""
210
            return _line_writer(line=line.replace(old, new))
211
        return _line_writer_fixed_highlighted
212
213
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
214
def grep_diff(opts):
215
    wt, branch, relpath = \
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
216
        controldir.ControlDir.open_containing_tree_or_branch('.')
6754.8.4 by Jelmer Vernooij
Use new context stuff.
217
    with branch.lock_read():
0.48.3 by Parth Malwankar
for grep_diff, if rev is not specified, last is used as start.
218
        if opts.revision:
219
            start_rev = opts.revision[0]
220
        else:
0.48.9 by Parth Malwankar
added inital test for 'grep -p'
221
            # if no revision is sepcified for diff grep we grep all changesets.
222
            opts.revision = [RevisionSpec.from_string('revno:1'),
223
                RevisionSpec.from_string('last:1')]
0.48.3 by Parth Malwankar
for grep_diff, if rev is not specified, last is used as start.
224
            start_rev = opts.revision[0]
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
225
        start_revid = start_rev.as_revision_id(branch)
6973.14.6 by Jelmer Vernooij
Fix some more tests.
226
        if start_revid == b'null:':
0.48.4 by Parth Malwankar
diff grep now works.
227
            return
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
228
        srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
229
        if len(opts.revision) == 2:
230
            end_rev = opts.revision[1]
231
            end_revid = end_rev.as_revision_id(branch)
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
232
            if end_revid is None:
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
233
                end_revno, end_revid = branch.last_revision_info()
234
            erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
235
236
            grep_mainline = (_rev_on_mainline(srevno_tuple) and
237
                _rev_on_mainline(erevno_tuple))
238
239
            # ensure that we go in reverse order
240
            if srevno_tuple > erevno_tuple:
241
                srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
242
                start_revid, end_revid = end_revid, start_revid
243
244
            # Optimization: Traversing the mainline in reverse order is much
245
            # faster when we don't want to look at merged revs. We try this
246
            # with _linear_view_revisions. If all revs are to be grepped we
247
            # use the slower _graph_view_revisions
248
            if opts.levels==1 and grep_mainline:
249
                given_revs = _linear_view_revisions(branch, start_revid, end_revid)
250
            else:
251
                given_revs = _graph_view_revisions(branch, start_revid, end_revid)
252
        else:
253
            # We do an optimization below. For grepping a specific revison
254
            # We don't need to call _graph_view_revisions which is slow.
255
            # We create the start_rev_tuple for only that specific revision.
256
            # _graph_view_revisions is used only for revision range.
257
            start_revno = '.'.join(map(str, srevno_tuple))
258
            start_rev_tuple = (start_revid, start_revno, 0)
259
            given_revs = [start_rev_tuple]
260
        repo = branch.repository
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
261
        diff_pattern = re.compile(b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
262
        file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
0.48.8 by Parth Malwankar
colored header for diff grep output
263
        outputter = _GrepDiffOutputter(opts)
264
        writeline = outputter.get_writer()
265
        writerevno = outputter.get_revision_header_writer()
266
        writefileheader = outputter.get_file_header_writer()
0.48.11 by Parth Malwankar
unicode decode fix for diff grep.
267
        file_encoding = _user_encoding
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
268
        for revid, revno, merge_depth in given_revs:
269
            if opts.levels == 1 and merge_depth != 0:
270
                # with level=1 show only top level
271
                continue
272
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
273
            rev_spec = RevisionSpec_revid.from_string("revid:"+revid.decode('utf-8'))
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
274
            new_rev = repo.get_revision(revid)
275
            new_tree = rev_spec.as_tree(branch)
276
            if len(new_rev.parent_ids) == 0:
277
                ancestor_id = _mod_revision.NULL_REVISION
278
            else:
279
                ancestor_id = new_rev.parent_ids[0]
280
            old_tree = repo.revision_tree(ancestor_id)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
281
            s = BytesIO()
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
282
            diff.show_diff_trees(old_tree, new_tree, s,
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
283
                old_label='', new_label='')
0.48.4 by Parth Malwankar
diff grep now works.
284
            display_revno = True
285
            display_file = False
286
            file_header = None
287
            text = s.getvalue()
0.48.6 by Parth Malwankar
removed fixed_string condition for diff grep
288
            for line in text.splitlines():
289
                if file_pattern.search(line):
290
                    file_header = line
291
                    display_file = True
0.48.11 by Parth Malwankar
unicode decode fix for diff grep.
292
                elif diff_pattern.search(line):
0.48.6 by Parth Malwankar
removed fixed_string condition for diff grep
293
                    if display_revno:
0.48.10 by Parth Malwankar
more tests for 'grep --diff'
294
                        writerevno("=== revno:%s ===" % (revno,))
0.48.6 by Parth Malwankar
removed fixed_string condition for diff grep
295
                        display_revno = False
296
                    if display_file:
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
297
                        writefileheader("  %s" % (file_header.decode(file_encoding, 'replace'),))
0.48.6 by Parth Malwankar
removed fixed_string condition for diff grep
298
                        display_file = False
0.48.11 by Parth Malwankar
unicode decode fix for diff grep.
299
                    line = line.decode(file_encoding, 'replace')
300
                    writeline("    %s" % (line,))
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
301
302
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
303
def versioned_grep(opts):
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
304
    wt, branch, relpath = \
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
305
        controldir.ControlDir.open_containing_tree_or_branch('.')
6754.8.4 by Jelmer Vernooij
Use new context stuff.
306
    with branch.lock_read():
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
307
        start_rev = opts.revision[0]
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
308
        start_revid = start_rev.as_revision_id(branch)
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
309
        if start_revid is None:
0.40.95 by Parth Malwankar
faster mainline rev grep
310
            start_rev = RevisionSpec_revno.from_string("revno:1")
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
311
            start_revid = start_rev.as_revision_id(branch)
312
        srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
0.40.88 by Parth Malwankar
updated to avoid relocking.
313
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
314
        if len(opts.revision) == 2:
315
            end_rev = opts.revision[1]
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
316
            end_revid = end_rev.as_revision_id(branch)
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
317
            if end_revid is None:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
318
                end_revno, end_revid = branch.last_revision_info()
319
            erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
0.40.95 by Parth Malwankar
faster mainline rev grep
320
0.40.106 by Parth Malwankar
fixed error in dotted rev reverse search.
321
            grep_mainline = (_rev_on_mainline(srevno_tuple) and
322
                _rev_on_mainline(erevno_tuple))
323
324
            # ensure that we go in reverse order
325
            if srevno_tuple > erevno_tuple:
326
                srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
327
                start_revid, end_revid = end_revid, start_revid
0.40.97 by Parth Malwankar
fixed caching bug for rev range.
328
0.40.95 by Parth Malwankar
faster mainline rev grep
329
            # Optimization: Traversing the mainline in reverse order is much
330
            # faster when we don't want to look at merged revs. We try this
331
            # with _linear_view_revisions. If all revs are to be grepped we
332
            # use the slower _graph_view_revisions
6531.3.9 by Jelmer Vernooij
Remove broken tests..
333
            if opts.levels == 1 and grep_mainline:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
334
                given_revs = _linear_view_revisions(branch, start_revid, end_revid)
0.40.95 by Parth Malwankar
faster mainline rev grep
335
            else:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
336
                given_revs = _graph_view_revisions(branch, start_revid, end_revid)
0.40.88 by Parth Malwankar
updated to avoid relocking.
337
        else:
0.40.94 by Parth Malwankar
code cleanup. moved start_rev_tuple into if cond that uses it.
338
            # We do an optimization below. For grepping a specific revison
339
            # We don't need to call _graph_view_revisions which is slow.
340
            # We create the start_rev_tuple for only that specific revision.
341
            # _graph_view_revisions is used only for revision range.
342
            start_revno = '.'.join(map(str, srevno_tuple))
343
            start_rev_tuple = (start_revid, start_revno, 0)
0.40.88 by Parth Malwankar
updated to avoid relocking.
344
            given_revs = [start_rev_tuple]
345
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
346
        # GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
347
        opts.outputter = _Outputter(opts, use_cache=True)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
348
0.40.88 by Parth Malwankar
updated to avoid relocking.
349
        for revid, revno, merge_depth in given_revs:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
350
            if opts.levels == 1 and merge_depth != 0:
0.40.88 by Parth Malwankar
updated to avoid relocking.
351
                # with level=1 show only top level
352
                continue
353
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
354
            rev = RevisionSpec_revid.from_string("revid:"+revid.decode('utf-8'))
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
355
            tree = rev.as_tree(branch)
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
356
            for path in opts.path_list:
6874.2.5 by Jelmer Vernooij
Fix grep.
357
                tree_path = osutils.pathjoin(relpath, path)
358
                if not tree.has_filename(tree_path):
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
359
                    trace.warning("Skipped unknown file '%s'.", path)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
360
                    continue
361
362
                if osutils.isdir(path):
363
                    path_prefix = path
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
364
                    dir_grep(tree, path, relpath, opts, revno, path_prefix)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
365
                else:
6874.2.5 by Jelmer Vernooij
Fix grep.
366
                    versioned_file_grep(tree, tree_path, '.', path, opts, revno)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
367
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
368
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
369
def workingtree_grep(opts):
370
    revno = opts.print_revno = None # for working tree set revno to None
0.40.69 by Parth Malwankar
reduced lock/unlock
371
372
    tree, branch, relpath = \
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
373
        controldir.ControlDir.open_containing_tree_or_branch('.')
0.40.130 by Parth Malwankar
grep in a branch with no tree does not throw stack trace (#572658)
374
    if not tree:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
375
        msg = ('Cannot search working tree. Working tree not found.\n'
376
            'To search for specific revision in history use the -r option.')
0.40.130 by Parth Malwankar
grep in a branch with no tree does not throw stack trace (#572658)
377
        raise errors.BzrCommandError(msg)
378
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
379
    # GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
380
    opts.outputter = _Outputter(opts)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
381
6754.8.4 by Jelmer Vernooij
Use new context stuff.
382
    with tree.lock_read():
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
383
        for path in opts.path_list:
0.40.69 by Parth Malwankar
reduced lock/unlock
384
            if osutils.isdir(path):
385
                path_prefix = path
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
386
                dir_grep(tree, path, relpath, opts, revno, path_prefix)
0.40.69 by Parth Malwankar
reduced lock/unlock
387
            else:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
388
                with open(path, 'rb') as f:
389
                    _file_grep(f.read(), path, opts, revno)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
390
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
391
0.40.74 by Parth Malwankar
optimization. --include/exclude are checked before reading the file.
392
def _skip_file(include, exclude, path):
393
    if include and not _path_in_glob_list(path, include):
394
        return True
395
    if exclude and _path_in_glob_list(path, exclude):
396
        return True
397
    return False
398
399
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
400
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
401
    # setup relpath to open files relative to cwd
402
    rpath = relpath
403
    if relpath:
6809.1.1 by Martin
Apply 2to3 ws_comma fixer
404
        rpath = osutils.pathjoin('..', relpath)
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
405
406
    from_dir = osutils.pathjoin(relpath, path)
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
407
    if opts.from_root:
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
408
        # start searching recursively from root
6874.2.1 by Jelmer Vernooij
Make Tree.iter_files_bytes() take paths rather than file_ids.
409
        from_dir = None
410
        recursive = True
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
411
0.40.85 by Parth Malwankar
optimized versioned grep to use iter_files_bytes.
412
    to_grep = []
0.40.92 by Parth Malwankar
performance tweaks to core cached result print loop.
413
    to_grep_append = to_grep.append
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
414
    # GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
415
    #                and hits manually refilled. Could do this again if it was
416
    #                for a good reason, otherwise cache might want purging.
417
    outputter = opts.outputter
0.40.69 by Parth Malwankar
reduced lock/unlock
418
    for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
419
        from_dir=from_dir, recursive=opts.recursive):
0.40.69 by Parth Malwankar
reduced lock/unlock
420
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
421
        if _skip_file(opts.include, opts.exclude, fp):
0.40.74 by Parth Malwankar
optimization. --include/exclude are checked before reading the file.
422
            continue
423
0.40.69 by Parth Malwankar
reduced lock/unlock
424
        if fc == 'V' and fkind == 'file':
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
425
            tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
426
            if revno is not None:
0.40.90 by Parth Malwankar
significant speedup for revision range grep by caching old result.
427
                # If old result is valid, print results immediately.
428
                # Otherwise, add file info to to_grep so that the
429
                # loop later will get chunks and grep them
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
430
                cache_id = tree.get_file_revision(tree_path, fid)
0.46.11 by Martin
Add method to outputter for writing cached lines
431
                if cache_id in outputter.cache:
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
432
                    # GZ 2010-06-05: Not really sure caching and re-outputting
433
                    #                the old path is really the right thing,
434
                    #                but it's what the old code seemed to do
0.46.11 by Martin
Add method to outputter for writing cached lines
435
                    outputter.write_cached_lines(cache_id, revno)
0.40.90 by Parth Malwankar
significant speedup for revision range grep by caching old result.
436
                else:
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
437
                    to_grep_append((tree_path, (fp, tree_path)))
0.40.69 by Parth Malwankar
reduced lock/unlock
438
            else:
439
                # we are grepping working tree.
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
440
                if from_dir is None:
0.40.69 by Parth Malwankar
reduced lock/unlock
441
                    from_dir = '.'
442
443
                path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
444
                if opts.files_with_matches or opts.files_without_match:
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
445
                    # Optimize for wtree list-only as we don't need to read the
446
                    # entire file
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
447
                    with open(path_for_file, 'rb', buffering=4096) as file:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
448
                        _file_grep_list_only_wtree(file, fp, opts, path_prefix)
0.40.121 by Parth Malwankar
initial implementation of -L/--files-without-matches. no tests.
449
                else:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
450
                    with open(path_for_file, 'rb') as f:
451
                        _file_grep(f.read(), fp, opts, revno, path_prefix)
0.40.43 by Parth Malwankar
moved cmd_grep._grep_dir to grep.dir_grep
452
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
453
    if revno is not None: # grep versioned files
454
        for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
0.40.85 by Parth Malwankar
optimized versioned grep to use iter_files_bytes.
455
            path = _make_display_path(relpath, path)
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
456
            _file_grep(b''.join(chunks), path, opts, revno, path_prefix,
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
457
                tree.get_file_revision(tree_path))
0.40.43 by Parth Malwankar
moved cmd_grep._grep_dir to grep.dir_grep
458
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
459
0.41.8 by Parth Malwankar
code cleanup.
460
def _make_display_path(relpath, path):
461
    """Return path string relative to user cwd.
0.40.42 by Parth Malwankar
fix to make grep paths relative to cwd
462
0.41.8 by Parth Malwankar
code cleanup.
463
    Take tree's 'relpath' and user supplied 'path', and return path
464
    that can be displayed to the user.
465
    """
0.40.15 by Parth Malwankar
some fixes and test updates
466
    if relpath:
0.40.52 by Parth Malwankar
code cleanup and documentation
467
        # update path so to display it w.r.t cwd
468
        # handle windows slash separator
0.40.20 by Parth Malwankar
used path functions from bzrlib.osutils
469
        path = osutils.normpath(osutils.pathjoin(relpath, path))
0.40.22 by Parth Malwankar
fixed display path formatting on windows
470
        path = path.replace('\\', '/')
471
        path = path.replace(relpath + '/', '', 1)
0.41.8 by Parth Malwankar
code cleanup.
472
    return path
473
474
6874.2.5 by Jelmer Vernooij
Fix grep.
475
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix = None):
0.41.10 by Parth Malwankar
code cleanup. added comments. path adjustment is now done
476
    """Create a file object for the specified id and pass it on to _file_grep.
477
    """
478
479
    path = _make_display_path(relpath, path)
6874.2.5 by Jelmer Vernooij
Fix grep.
480
    file_text = tree.get_file_text(tree_path)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
481
    _file_grep(file_text, path, opts, revno, path_prefix)
0.41.21 by Parth Malwankar
include/exclude working now. tests not added.
482
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
483
0.41.21 by Parth Malwankar
include/exclude working now. tests not added.
484
def _path_in_glob_list(path, glob_list):
485
    for glob in glob_list:
486
        if fnmatch(path, glob):
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
487
            return True
488
    return False
0.41.12 by Parth Malwankar
initial support for working tree grep (no test cases yet!)
489
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
490
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
491
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
492
    # test and skip binary files
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
493
    if b'\x00' in file.read(1024):
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
494
        if opts.verbose:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
495
            trace.warning("Binary file '%s' skipped.", path)
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
496
        return
0.40.118 by Parth Malwankar
further optimization of _file_grep_list_only_wtree.
497
498
    file.seek(0) # search from beginning
499
500
    found = False
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
501
    if opts.fixed_string:
502
        pattern = opts.pattern.encode(_user_encoding, 'replace')
0.46.1 by Martin
Make -Fi use regexps for re.IGNORECASE rather than double str.lower
503
        for line in file:
504
            if pattern in line:
505
                found = True
506
                break
0.40.121 by Parth Malwankar
initial implementation of -L/--files-without-matches. no tests.
507
    else: # not fixed_string
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
508
        for line in file:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
509
            if opts.patternc.search(line):
0.40.118 by Parth Malwankar
further optimization of _file_grep_list_only_wtree.
510
                found = True
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
511
                break
512
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
513
    if (opts.files_with_matches and found) or \
514
        (opts.files_without_match and not found):
0.40.118 by Parth Malwankar
further optimization of _file_grep_list_only_wtree.
515
        if path_prefix and path_prefix != '.':
516
            # user has passed a dir arg, show that as result prefix
517
            path = osutils.pathjoin(path_prefix, path)
0.46.18 by Martin
Fix another, previously existing issue with colour and match-only
518
        opts.outputter.get_writer(path, None, None)()
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
519
520
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
521
class _Outputter(object):
522
    """Precalculate formatting based on options given
523
524
    The idea here is to do this work only once per run, and finally return a
525
    function that will do the minimum amount possible for each match.
0.46.3 by Martin
Start moving formatting setup out of _file_grep, only for files_with_matches so far
526
    """
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
527
    def __init__(self, opts, use_cache=False):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
528
        self.outf = opts.outf
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
529
        if use_cache:
530
            # self.cache is used to cache results for dir grep based on fid.
531
            # If the fid is does not change between results, it means that
532
            # the result will be the same apart from revno. In such a case
533
            # we avoid getting file chunks from repo and grepping. The result
534
            # is just printed by replacing old revno with new one.
535
            self.cache = {}
536
        else:
537
            self.cache = None
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
538
        no_line = opts.files_with_matches or opts.files_without_match
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
539
540
        if opts.show_color:
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
541
            if no_line:
542
                self.get_writer = self._get_writer_plain
543
            elif opts.fixed_string:
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
544
                self._old = opts.pattern
545
                self._new = color_string(opts.pattern, FG.BOLD_RED)
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
546
                self.get_writer = self._get_writer_fixed_highlighted
547
            else:
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
548
                flags = opts.patternc.flags
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
549
                self._sub = re.compile(opts.pattern.join(("((?:", ")+)")), flags).sub
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
550
                self._highlight = color_string("\\1", FG.BOLD_RED)
551
                self.get_writer = self._get_writer_regexp_highlighted
552
            path_start = FG.MAGENTA
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
553
            path_end = FG.NONE
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
554
            sep = color_string(':', FG.BOLD_CYAN)
555
            rev_sep = color_string('~', FG.BOLD_YELLOW)
556
        else:
557
            self.get_writer = self._get_writer_plain
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
558
            path_start = path_end = ""
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
559
            sep = ":"
560
            rev_sep = "~"
561
562
        parts = [path_start, "%(path)s"]
0.46.3 by Martin
Start moving formatting setup out of _file_grep, only for files_with_matches so far
563
        if opts.print_revno:
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
564
            parts.extend([rev_sep, "%(revno)s"])
0.46.13 by Martin
Split format string into two parts for non-cached operations too
565
        self._format_initial = "".join(parts)
566
        parts = []
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
567
        if no_line:
568
            if not opts.print_revno:
569
                parts.append(path_end)
570
        else:
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
571
            if opts.line_number:
0.46.13 by Martin
Split format string into two parts for non-cached operations too
572
                parts.extend([sep, "%(lineno)s"])
573
            parts.extend([sep, "%(line)s"])
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
574
        parts.append(opts.eol_marker)
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
575
        self._format_perline = "".join(parts)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
576
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
577
    def _get_writer_plain(self, path, revno, cache_id):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
578
        """Get function for writing uncoloured output"""
0.46.13 by Martin
Split format string into two parts for non-cached operations too
579
        per_line = self._format_perline
580
        start = self._format_initial % {"path":path, "revno":revno}
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
581
        write = self.outf.write
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
582
        if self.cache is not None and cache_id is not None:
583
            result_list = []
584
            self.cache[cache_id] = path, result_list
585
            add_to_cache = result_list.append
586
            def _line_cache_and_writer(**kwargs):
587
                """Write formatted line and cache arguments"""
0.46.12 by Martin
Split format string for cache to only store a string, not a dict
588
                end = per_line % kwargs
589
                add_to_cache(end)
590
                write(start + end)
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
591
            return _line_cache_and_writer
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
592
        def _line_writer(**kwargs):
593
            """Write formatted line from arguments given by underlying opts"""
0.46.13 by Martin
Split format string into two parts for non-cached operations too
594
            write(start + per_line % kwargs)
0.46.8 by Martin
Move pattern highlighting out of _file_grep and into the line writing code
595
        return _line_writer
596
0.46.11 by Martin
Add method to outputter for writing cached lines
597
    def write_cached_lines(self, cache_id, revno):
598
        """Write cached results out again for new revision"""
599
        cached_path, cached_matches = self.cache[cache_id]
0.46.12 by Martin
Split format string for cache to only store a string, not a dict
600
        start = self._format_initial % {"path":cached_path, "revno":revno}
0.46.11 by Martin
Add method to outputter for writing cached lines
601
        write = self.outf.write
0.46.12 by Martin
Split format string for cache to only store a string, not a dict
602
        for end in cached_matches:
603
            write(start + end)
0.46.11 by Martin
Add method to outputter for writing cached lines
604
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
605
    def _get_writer_regexp_highlighted(self, path, revno, cache_id):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
606
        """Get function for writing output with regexp match highlighted"""
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
607
        _line_writer = self._get_writer_plain(path, revno, cache_id)
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
608
        sub, highlight = self._sub, self._highlight
0.46.8 by Martin
Move pattern highlighting out of _file_grep and into the line writing code
609
        def _line_writer_regexp_highlighted(line, **kwargs):
610
            """Write formatted line with matched pattern highlighted"""
611
            return _line_writer(line=sub(highlight, line), **kwargs)
612
        return _line_writer_regexp_highlighted
613
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
614
    def _get_writer_fixed_highlighted(self, path, revno, cache_id):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
615
        """Get function for writing output with search string highlighted"""
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
616
        _line_writer = self._get_writer_plain(path, revno, cache_id)
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
617
        old, new = self._old, self._new
618
        def _line_writer_fixed_highlighted(line, **kwargs):
619
            """Write formatted line with string searched for highlighted"""
620
            return _line_writer(line=line.replace(old, new), **kwargs)
621
        return _line_writer_fixed_highlighted
0.46.3 by Martin
Start moving formatting setup out of _file_grep, only for files_with_matches so far
622
623
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
624
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
0.41.9 by Parth Malwankar
refactored code towards support for working tree grep.
625
    # test and skip binary files
6977.2.1 by Jelmer Vernooij
Require that get_file implementations are contect managers, simplify file handling in transform.
626
    if b'\x00' in file_text[:1024]:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
627
        if opts.verbose:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
628
            trace.warning("Binary file '%s' skipped.", path)
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
629
        return
0.41.9 by Parth Malwankar
refactored code towards support for working tree grep.
630
0.40.52 by Parth Malwankar
code cleanup and documentation
631
    if path_prefix and path_prefix != '.':
632
        # user has passed a dir arg, show that as result prefix
633
        path = osutils.pathjoin(path_prefix, path)
634
0.46.21 by Martin
Fix and test bytes/unicode issue but there's more to do in this area
635
    # GZ 2010-06-07: There's no actual guarentee the file contents will be in
636
    #                the user encoding, but we have to guess something and it
637
    #                is a reasonable default without a better mechanism.
638
    file_encoding = _user_encoding
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
639
    pattern = opts.pattern.encode(_user_encoding, 'replace')
0.43.8 by Parth Malwankar
added color for regex pattern.
640
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
641
    writeline = opts.outputter.get_writer(path, revno, cache_id)
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
642
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
643
    if opts.files_with_matches or opts.files_without_match:
0.46.2 by Martin
Remove redundant code on files_with_matches path in _file_grep
644
        if opts.fixed_string:
6691.1.5 by Jelmer Vernooij
Drop support for Python <= 2.5.
645
            found = pattern in file_text
0.40.112 by Parth Malwankar
support for -l, --files-with-matches. no tests yet.
646
        else:
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
647
            search = opts.patternc.search
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
648
            if b"$" not in pattern:
0.47.2 by Martin
Use whole text search for match only cases where possible as well
649
                found = search(file_text) is not None
650
            else:
651
                for line in file_text.splitlines():
652
                    if search(line):
653
                        found = True
654
                        break
655
                else:
656
                    found = False
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
657
        if (opts.files_with_matches and found) or \
658
                (opts.files_without_match and not found):
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
659
            writeline()
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
660
    elif opts.fixed_string:
0.47.1 by Martin
Implement whole text search for fast failure on no match
661
        # Fast path for no match, search through the entire file at once rather
6619.3.25 by Jelmer Vernooij
Drop some old dependency checks.
662
        # than a line at a time. <http://effbot.org/zone/stringlib.htm>
663
        i = file_text.find(pattern)
664
        if i == -1:
665
            return
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
666
        b = file_text.rfind(b"\n", 0, i) + 1
6619.3.25 by Jelmer Vernooij
Drop some old dependency checks.
667
        if opts.line_number:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
668
            start = file_text.count(b"\n", 0, b) + 1
6619.3.25 by Jelmer Vernooij
Drop some old dependency checks.
669
        file_text = file_text[b:]
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
670
        if opts.line_number:
0.46.5 by Martin
Delete now redundant duplicated loops in _file_grep
671
            for index, line in enumerate(file_text.splitlines()):
672
                if pattern in line:
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
673
                    line = line.decode(file_encoding, 'replace')
0.47.1 by Martin
Implement whole text search for fast failure on no match
674
                    writeline(lineno=index+start, line=line)
0.46.5 by Martin
Delete now redundant duplicated loops in _file_grep
675
        else:
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
676
            for line in file_text.splitlines():
677
                if pattern in line:
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
678
                    line = line.decode(file_encoding, 'replace')
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
679
                    writeline(line=line)
0.40.63 by Parth Malwankar
performance: moved conditionals out of core loop.
680
    else:
0.47.1 by Martin
Implement whole text search for fast failure on no match
681
        # Fast path on no match, the re module avoids bad behaviour in most
682
        # standard cases, but perhaps could try and detect backtracking
683
        # patterns here and avoid whole text search in those cases
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
684
        search = opts.patternc.search
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
685
        if b"$" not in pattern:
0.47.1 by Martin
Implement whole text search for fast failure on no match
686
            # GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
687
            #                through revisions as bazaar returns binary mode
688
            #                and trailing \r breaks $ as line ending match
689
            m = search(file_text)
690
            if m is None:
691
                return
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
692
            b = file_text.rfind(b"\n", 0, m.start()) + 1
0.47.1 by Martin
Implement whole text search for fast failure on no match
693
            if opts.line_number:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
694
                start = file_text.count(b"\n", 0, b) + 1
0.47.4 by Martin
Scale back no-match fast path to avoid some behaviour changes with line endings
695
            file_text = file_text[b:]
0.47.3 by Martin
Fix previously untested bug with regexp and line numbers introduced by optimisation
696
        else:
697
            start = 1
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
698
        if opts.line_number:
699
            for index, line in enumerate(file_text.splitlines()):
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
700
                if search(line):
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
701
                    line = line.decode(file_encoding, 'replace')
0.47.3 by Martin
Fix previously untested bug with regexp and line numbers introduced by optimisation
702
                    writeline(lineno=index+start, line=line)
0.40.83 by Parth Malwankar
added support for -F/--fixed-string.
703
        else:
704
            for line in file_text.splitlines():
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
705
                if search(line):
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
706
                    line = line.decode(file_encoding, 'replace')
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
707
                    writeline(line=line)
0.40.139 by Parth Malwankar
(Martin [gz]) Added fast path for no match that avoids splitting the
708