/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.40.10 by Parth Malwankar
assigned copyright to canonical
1
# Copyright (C) 2010 Canonical Ltd
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
0.40.147 by Jelmer Vernooij
Fix compatibility with newer versions of bzr: don't use relative imports in lazy imports, and import features from bzrlib.tests.features.
17
from __future__ import absolute_import
18
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
19
import re
0.47.1 by Martin
Implement whole text search for fast failure on no match
20
import sys
21
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
22
from ...lazy_import import lazy_import
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
23
lazy_import(globals(), """
0.40.83 by Parth Malwankar
added support for -F/--fixed-string.
24
from fnmatch import fnmatch
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
25
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
26
from breezy._termcolor import color_string, FG
0.43.4 by Parth Malwankar
initial support for color for fixed string grep.
27
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
28
from breezy import (
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
29
    controldir,
0.48.5 by Parth Malwankar
fixed imports
30
    diff,
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
31
    errors,
32
    lazy_regex,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
33
    revision as _mod_revision,
34
    )
35
""")
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
36
from breezy import (
0.40.47 by Parth Malwankar
fixes bug #531336. binary files are now skipped.
37
    osutils,
38
    trace,
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
39
    )
6800.1.5 by Jelmer Vernooij
Fix more imports.
40
from breezy.revisionspec import (
41
    RevisionSpec,
42
    RevisionSpec_revid,
43
    RevisionSpec_revno,
44
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
45
from breezy.sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
46
    BytesIO,
47
    )
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
48
0.40.83 by Parth Malwankar
added support for -F/--fixed-string.
49
_user_encoding = osutils.get_user_encoding()
50
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
51
0.40.95 by Parth Malwankar
faster mainline rev grep
52
class _RevisionNotLinear(Exception):
53
    """Raised when a revision is not on left-hand history."""
54
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
55
0.40.95 by Parth Malwankar
faster mainline rev grep
56
def _rev_on_mainline(rev_tuple):
57
    """returns True is rev tuple is on mainline"""
58
    if len(rev_tuple) == 1:
59
        return True
60
    return rev_tuple[1] == 0 and rev_tuple[2] == 0
61
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
62
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
63
# NOTE: _linear_view_revisions is basided on
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
64
# breezy.log._linear_view_revisions.
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
65
# This should probably be a common public API
0.40.95 by Parth Malwankar
faster mainline rev grep
66
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
0.40.106 by Parth Malwankar
fixed error in dotted rev reverse search.
67
    # requires that start is older than end
0.40.95 by Parth Malwankar
faster mainline rev grep
68
    repo = branch.repository
6531.3.6 by Jelmer Vernooij
Use iter_lefthand_ancestry rather than removed iter_reverse_revision_history.
69
    graph = repo.get_graph()
6531.3.7 by Jelmer Vernooij
Formatting.
70
    for revision_id in graph.iter_lefthand_ancestry(
71
            end_rev_id, (_mod_revision.NULL_REVISION, )):
0.40.95 by Parth Malwankar
faster mainline rev grep
72
        revno = branch.revision_id_to_dotted_revno(revision_id)
73
        revno_str = '.'.join(str(n) for n in revno)
74
        if revision_id == start_rev_id:
75
            yield revision_id, revno_str, 0
76
            break
77
        yield revision_id, revno_str, 0
78
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
79
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
80
# NOTE: _graph_view_revisions is copied from
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
81
# breezy.log._graph_view_revisions.
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
82
# This should probably be a common public API
83
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
84
                          rebase_initial_depths=True):
85
    """Calculate revisions to view including merges, newest to oldest.
86
87
    :param branch: the branch
88
    :param start_rev_id: the lower revision-id
89
    :param end_rev_id: the upper revision-id
90
    :param rebase_initial_depth: should depths be rebased until a mainline
91
      revision is found?
92
    :return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
93
    """
0.40.106 by Parth Malwankar
fixed error in dotted rev reverse search.
94
    # requires that start is older than end
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
95
    view_revisions = branch.iter_merge_sorted_revisions(
96
        start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
97
        stop_rule="with-merges")
98
    if not rebase_initial_depths:
99
        for (rev_id, merge_depth, revno, end_of_merge
100
             ) in view_revisions:
101
            yield rev_id, '.'.join(map(str, revno)), merge_depth
102
    else:
103
        # We're following a development line starting at a merged revision.
104
        # We need to adjust depths down by the initial depth until we find
105
        # a depth less than it. Then we use that depth as the adjustment.
106
        # If and when we reach the mainline, depth adjustment ends.
107
        depth_adjustment = None
108
        for (rev_id, merge_depth, revno, end_of_merge
109
             ) in view_revisions:
110
            if depth_adjustment is None:
111
                depth_adjustment = merge_depth
112
            if depth_adjustment:
113
                if merge_depth < depth_adjustment:
114
                    # From now on we reduce the depth adjustement, this can be
115
                    # surprising for users. The alternative requires two passes
116
                    # which breaks the fast display of the first revision
117
                    # though.
118
                    depth_adjustment = merge_depth
119
                merge_depth -= depth_adjustment
120
            yield rev_id, '.'.join(map(str, revno)), merge_depth
121
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
122
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
123
def compile_pattern(pattern, flags=0):
124
    patternc = None
125
    try:
126
        # use python's re.compile as we need to catch re.error in case of bad pattern
127
        lazy_regex.reset_compile()
128
        patternc = re.compile(pattern, flags)
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
129
    except re.error as e:
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
130
        raise errors.BzrError("Invalid pattern: '%s'" % pattern)
131
    return patternc
132
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
133
0.40.86 by Parth Malwankar
the check for implicit fixed_string now allows for spaces.
134
def is_fixed_string(s):
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
135
    if re.match("^([A-Za-z0-9_]|\\s)*$", s):
0.40.86 by Parth Malwankar
the check for implicit fixed_string now allows for spaces.
136
        return True
137
    return False
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
138
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
139
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
140
class _GrepDiffOutputter(object):
141
    """Precalculate formatting based on options given for diff grep.
142
    """
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
143
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
144
    def __init__(self, opts):
0.48.8 by Parth Malwankar
colored header for diff grep output
145
        self.opts = opts
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
146
        self.outf = opts.outf
147
        if opts.show_color:
148
            if opts.fixed_string:
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
149
                self._old = opts.pattern
150
                self._new = color_string(opts.pattern, FG.BOLD_RED)
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
151
                self.get_writer = self._get_writer_fixed_highlighted
152
            else:
153
                flags = opts.patternc.flags
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
154
                self._sub = re.compile(opts.pattern.join(("((?:", ")+)")), flags).sub
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
155
                self._highlight = color_string("\\1", FG.BOLD_RED)
156
                self.get_writer = self._get_writer_regexp_highlighted
157
        else:
158
            self.get_writer = self._get_writer_plain
159
0.48.8 by Parth Malwankar
colored header for diff grep output
160
    def get_file_header_writer(self):
161
        """Get function for writing file headers"""
162
        write = self.outf.write
163
        eol_marker = self.opts.eol_marker
164
        def _line_writer(line):
165
            write(line + eol_marker)
166
        def _line_writer_color(line):
167
            write(FG.BOLD_MAGENTA + line + FG.NONE + eol_marker)
168
        if self.opts.show_color:
169
            return _line_writer_color
170
        else:
171
            return _line_writer
172
        return _line_writer
173
174
    def get_revision_header_writer(self):
175
        """Get function for writing revno lines"""
176
        write = self.outf.write
177
        eol_marker = self.opts.eol_marker
178
        def _line_writer(line):
179
            write(line + eol_marker)
180
        def _line_writer_color(line):
181
            write(FG.BOLD_BLUE + line + FG.NONE + eol_marker)
182
        if self.opts.show_color:
183
            return _line_writer_color
184
        else:
185
            return _line_writer
186
        return _line_writer
187
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
188
    def _get_writer_plain(self):
189
        """Get function for writing uncoloured output"""
190
        write = self.outf.write
0.48.8 by Parth Malwankar
colored header for diff grep output
191
        eol_marker = self.opts.eol_marker
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
192
        def _line_writer(line):
0.48.8 by Parth Malwankar
colored header for diff grep output
193
            write(line + eol_marker)
0.48.7 by Parth Malwankar
initial outputter support for diff_grep
194
        return _line_writer
195
196
    def _get_writer_regexp_highlighted(self):
197
        """Get function for writing output with regexp match highlighted"""
198
        _line_writer = self._get_writer_plain()
199
        sub, highlight = self._sub, self._highlight
200
        def _line_writer_regexp_highlighted(line):
201
            """Write formatted line with matched pattern highlighted"""
202
            return _line_writer(line=sub(highlight, line))
203
        return _line_writer_regexp_highlighted
204
205
    def _get_writer_fixed_highlighted(self):
206
        """Get function for writing output with search string highlighted"""
207
        _line_writer = self._get_writer_plain()
208
        old, new = self._old, self._new
209
        def _line_writer_fixed_highlighted(line):
210
            """Write formatted line with string searched for highlighted"""
211
            return _line_writer(line=line.replace(old, new))
212
        return _line_writer_fixed_highlighted
213
214
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
215
def grep_diff(opts):
216
    wt, branch, relpath = \
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
217
        controldir.ControlDir.open_containing_tree_or_branch('.')
6754.8.4 by Jelmer Vernooij
Use new context stuff.
218
    with branch.lock_read():
0.48.3 by Parth Malwankar
for grep_diff, if rev is not specified, last is used as start.
219
        if opts.revision:
220
            start_rev = opts.revision[0]
221
        else:
0.48.9 by Parth Malwankar
added inital test for 'grep -p'
222
            # if no revision is sepcified for diff grep we grep all changesets.
223
            opts.revision = [RevisionSpec.from_string('revno:1'),
224
                RevisionSpec.from_string('last:1')]
0.48.3 by Parth Malwankar
for grep_diff, if rev is not specified, last is used as start.
225
            start_rev = opts.revision[0]
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
226
        start_revid = start_rev.as_revision_id(branch)
6973.14.6 by Jelmer Vernooij
Fix some more tests.
227
        if start_revid == b'null:':
0.48.4 by Parth Malwankar
diff grep now works.
228
            return
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
229
        srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
230
        if len(opts.revision) == 2:
231
            end_rev = opts.revision[1]
232
            end_revid = end_rev.as_revision_id(branch)
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
233
            if end_revid is None:
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
234
                end_revno, end_revid = branch.last_revision_info()
235
            erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
236
237
            grep_mainline = (_rev_on_mainline(srevno_tuple) and
238
                _rev_on_mainline(erevno_tuple))
239
240
            # ensure that we go in reverse order
241
            if srevno_tuple > erevno_tuple:
242
                srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
243
                start_revid, end_revid = end_revid, start_revid
244
245
            # Optimization: Traversing the mainline in reverse order is much
246
            # faster when we don't want to look at merged revs. We try this
247
            # with _linear_view_revisions. If all revs are to be grepped we
248
            # use the slower _graph_view_revisions
249
            if opts.levels==1 and grep_mainline:
250
                given_revs = _linear_view_revisions(branch, start_revid, end_revid)
251
            else:
252
                given_revs = _graph_view_revisions(branch, start_revid, end_revid)
253
        else:
254
            # We do an optimization below. For grepping a specific revison
255
            # We don't need to call _graph_view_revisions which is slow.
256
            # We create the start_rev_tuple for only that specific revision.
257
            # _graph_view_revisions is used only for revision range.
258
            start_revno = '.'.join(map(str, srevno_tuple))
259
            start_rev_tuple = (start_revid, start_revno, 0)
260
            given_revs = [start_rev_tuple]
261
        repo = branch.repository
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
262
        diff_pattern = re.compile(b"^[+\\-].*(" + opts.pattern.encode(_user_encoding) + b")")
263
        file_pattern = re.compile(b"=== (modified|added|removed) file '.*'")
0.48.8 by Parth Malwankar
colored header for diff grep output
264
        outputter = _GrepDiffOutputter(opts)
265
        writeline = outputter.get_writer()
266
        writerevno = outputter.get_revision_header_writer()
267
        writefileheader = outputter.get_file_header_writer()
0.48.11 by Parth Malwankar
unicode decode fix for diff grep.
268
        file_encoding = _user_encoding
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
269
        for revid, revno, merge_depth in given_revs:
270
            if opts.levels == 1 and merge_depth != 0:
271
                # with level=1 show only top level
272
                continue
273
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
274
            rev_spec = RevisionSpec_revid.from_string("revid:"+revid.decode('utf-8'))
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
275
            new_rev = repo.get_revision(revid)
276
            new_tree = rev_spec.as_tree(branch)
277
            if len(new_rev.parent_ids) == 0:
278
                ancestor_id = _mod_revision.NULL_REVISION
279
            else:
280
                ancestor_id = new_rev.parent_ids[0]
281
            old_tree = repo.revision_tree(ancestor_id)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
282
            s = BytesIO()
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
283
            diff.show_diff_trees(old_tree, new_tree, s,
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
284
                old_label='', new_label='')
0.48.4 by Parth Malwankar
diff grep now works.
285
            display_revno = True
286
            display_file = False
287
            file_header = None
288
            text = s.getvalue()
0.48.6 by Parth Malwankar
removed fixed_string condition for diff grep
289
            for line in text.splitlines():
290
                if file_pattern.search(line):
291
                    file_header = line
292
                    display_file = True
0.48.11 by Parth Malwankar
unicode decode fix for diff grep.
293
                elif diff_pattern.search(line):
0.48.6 by Parth Malwankar
removed fixed_string condition for diff grep
294
                    if display_revno:
0.48.10 by Parth Malwankar
more tests for 'grep --diff'
295
                        writerevno("=== revno:%s ===" % (revno,))
0.48.6 by Parth Malwankar
removed fixed_string condition for diff grep
296
                        display_revno = False
297
                    if display_file:
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
298
                        writefileheader("  %s" % (file_header.decode(file_encoding, 'replace'),))
0.48.6 by Parth Malwankar
removed fixed_string condition for diff grep
299
                        display_file = False
0.48.11 by Parth Malwankar
unicode decode fix for diff grep.
300
                    line = line.decode(file_encoding, 'replace')
301
                    writeline("    %s" % (line,))
0.48.2 by Parth Malwankar
intermediate checkin. we now show diff with -p option.
302
303
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
304
def versioned_grep(opts):
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
305
    wt, branch, relpath = \
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
306
        controldir.ControlDir.open_containing_tree_or_branch('.')
6754.8.4 by Jelmer Vernooij
Use new context stuff.
307
    with branch.lock_read():
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
308
        start_rev = opts.revision[0]
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
309
        start_revid = start_rev.as_revision_id(branch)
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
310
        if start_revid is None:
0.40.95 by Parth Malwankar
faster mainline rev grep
311
            start_rev = RevisionSpec_revno.from_string("revno:1")
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
312
            start_revid = start_rev.as_revision_id(branch)
313
        srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
0.40.88 by Parth Malwankar
updated to avoid relocking.
314
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
315
        if len(opts.revision) == 2:
316
            end_rev = opts.revision[1]
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
317
            end_revid = end_rev.as_revision_id(branch)
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
318
            if end_revid is None:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
319
                end_revno, end_revid = branch.last_revision_info()
320
            erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
0.40.95 by Parth Malwankar
faster mainline rev grep
321
0.40.106 by Parth Malwankar
fixed error in dotted rev reverse search.
322
            grep_mainline = (_rev_on_mainline(srevno_tuple) and
323
                _rev_on_mainline(erevno_tuple))
324
325
            # ensure that we go in reverse order
326
            if srevno_tuple > erevno_tuple:
327
                srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
328
                start_revid, end_revid = end_revid, start_revid
0.40.97 by Parth Malwankar
fixed caching bug for rev range.
329
0.40.95 by Parth Malwankar
faster mainline rev grep
330
            # Optimization: Traversing the mainline in reverse order is much
331
            # faster when we don't want to look at merged revs. We try this
332
            # with _linear_view_revisions. If all revs are to be grepped we
333
            # use the slower _graph_view_revisions
6531.3.9 by Jelmer Vernooij
Remove broken tests..
334
            if opts.levels == 1 and grep_mainline:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
335
                given_revs = _linear_view_revisions(branch, start_revid, end_revid)
0.40.95 by Parth Malwankar
faster mainline rev grep
336
            else:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
337
                given_revs = _graph_view_revisions(branch, start_revid, end_revid)
0.40.88 by Parth Malwankar
updated to avoid relocking.
338
        else:
0.40.94 by Parth Malwankar
code cleanup. moved start_rev_tuple into if cond that uses it.
339
            # We do an optimization below. For grepping a specific revison
340
            # We don't need to call _graph_view_revisions which is slow.
341
            # We create the start_rev_tuple for only that specific revision.
342
            # _graph_view_revisions is used only for revision range.
343
            start_revno = '.'.join(map(str, srevno_tuple))
344
            start_rev_tuple = (start_revid, start_revno, 0)
0.40.88 by Parth Malwankar
updated to avoid relocking.
345
            given_revs = [start_rev_tuple]
346
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
347
        # GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
348
        opts.outputter = _Outputter(opts, use_cache=True)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
349
0.40.88 by Parth Malwankar
updated to avoid relocking.
350
        for revid, revno, merge_depth in given_revs:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
351
            if opts.levels == 1 and merge_depth != 0:
0.40.88 by Parth Malwankar
updated to avoid relocking.
352
                # with level=1 show only top level
353
                continue
354
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
355
            rev = RevisionSpec_revid.from_string("revid:"+revid.decode('utf-8'))
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
356
            tree = rev.as_tree(branch)
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
357
            for path in opts.path_list:
6874.2.5 by Jelmer Vernooij
Fix grep.
358
                tree_path = osutils.pathjoin(relpath, path)
359
                if not tree.has_filename(tree_path):
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
360
                    trace.warning("Skipped unknown file '%s'.", path)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
361
                    continue
362
363
                if osutils.isdir(path):
364
                    path_prefix = path
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
365
                    dir_grep(tree, path, relpath, opts, revno, path_prefix)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
366
                else:
6874.2.5 by Jelmer Vernooij
Fix grep.
367
                    versioned_file_grep(tree, tree_path, '.', path, opts, revno)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
368
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
369
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
370
def workingtree_grep(opts):
371
    revno = opts.print_revno = None # for working tree set revno to None
0.40.69 by Parth Malwankar
reduced lock/unlock
372
373
    tree, branch, relpath = \
6667.2.1 by Jelmer Vernooij
Some cleanup; s/BzrDir/ControlDir/, remove some unused imports.
374
        controldir.ControlDir.open_containing_tree_or_branch('.')
0.40.130 by Parth Malwankar
grep in a branch with no tree does not throw stack trace (#572658)
375
    if not tree:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
376
        msg = ('Cannot search working tree. Working tree not found.\n'
377
            'To search for specific revision in history use the -r option.')
0.40.130 by Parth Malwankar
grep in a branch with no tree does not throw stack trace (#572658)
378
        raise errors.BzrCommandError(msg)
379
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
380
    # GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
381
    opts.outputter = _Outputter(opts)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
382
6754.8.4 by Jelmer Vernooij
Use new context stuff.
383
    with tree.lock_read():
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
384
        for path in opts.path_list:
0.40.69 by Parth Malwankar
reduced lock/unlock
385
            if osutils.isdir(path):
386
                path_prefix = path
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
387
                dir_grep(tree, path, relpath, opts, revno, path_prefix)
0.40.69 by Parth Malwankar
reduced lock/unlock
388
            else:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
389
                with open(path, 'rb') as f:
390
                    _file_grep(f.read(), path, opts, revno)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
391
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
392
0.40.74 by Parth Malwankar
optimization. --include/exclude are checked before reading the file.
393
def _skip_file(include, exclude, path):
394
    if include and not _path_in_glob_list(path, include):
395
        return True
396
    if exclude and _path_in_glob_list(path, exclude):
397
        return True
398
    return False
399
400
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
401
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
402
    # setup relpath to open files relative to cwd
403
    rpath = relpath
404
    if relpath:
6809.1.1 by Martin
Apply 2to3 ws_comma fixer
405
        rpath = osutils.pathjoin('..', relpath)
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
406
407
    from_dir = osutils.pathjoin(relpath, path)
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
408
    if opts.from_root:
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
409
        # start searching recursively from root
6874.2.1 by Jelmer Vernooij
Make Tree.iter_files_bytes() take paths rather than file_ids.
410
        from_dir = None
411
        recursive = True
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
412
0.40.85 by Parth Malwankar
optimized versioned grep to use iter_files_bytes.
413
    to_grep = []
0.40.92 by Parth Malwankar
performance tweaks to core cached result print loop.
414
    to_grep_append = to_grep.append
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
415
    # GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
416
    #                and hits manually refilled. Could do this again if it was
417
    #                for a good reason, otherwise cache might want purging.
418
    outputter = opts.outputter
0.40.69 by Parth Malwankar
reduced lock/unlock
419
    for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
420
        from_dir=from_dir, recursive=opts.recursive):
0.40.69 by Parth Malwankar
reduced lock/unlock
421
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
422
        if _skip_file(opts.include, opts.exclude, fp):
0.40.74 by Parth Malwankar
optimization. --include/exclude are checked before reading the file.
423
            continue
424
0.40.69 by Parth Malwankar
reduced lock/unlock
425
        if fc == 'V' and fkind == 'file':
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
426
            tree_path = osutils.pathjoin(from_dir if from_dir else '', fp)
427
            if revno is not None:
0.40.90 by Parth Malwankar
significant speedup for revision range grep by caching old result.
428
                # If old result is valid, print results immediately.
429
                # Otherwise, add file info to to_grep so that the
430
                # loop later will get chunks and grep them
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
431
                cache_id = tree.get_file_revision(tree_path, fid)
0.46.11 by Martin
Add method to outputter for writing cached lines
432
                if cache_id in outputter.cache:
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
433
                    # GZ 2010-06-05: Not really sure caching and re-outputting
434
                    #                the old path is really the right thing,
435
                    #                but it's what the old code seemed to do
0.46.11 by Martin
Add method to outputter for writing cached lines
436
                    outputter.write_cached_lines(cache_id, revno)
0.40.90 by Parth Malwankar
significant speedup for revision range grep by caching old result.
437
                else:
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
438
                    to_grep_append((tree_path, (fp, tree_path)))
0.40.69 by Parth Malwankar
reduced lock/unlock
439
            else:
440
                # we are grepping working tree.
6531.3.8 by Jelmer Vernooij
Move color feature into bzrlib.tests.features.
441
                if from_dir is None:
0.40.69 by Parth Malwankar
reduced lock/unlock
442
                    from_dir = '.'
443
444
                path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
445
                if opts.files_with_matches or opts.files_without_match:
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
446
                    # Optimize for wtree list-only as we don't need to read the
447
                    # entire file
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
448
                    with open(path_for_file, 'rb', buffering=4096) as file:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
449
                        _file_grep_list_only_wtree(file, fp, opts, path_prefix)
0.40.121 by Parth Malwankar
initial implementation of -L/--files-without-matches. no tests.
450
                else:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
451
                    with open(path_for_file, 'rb') as f:
452
                        _file_grep(f.read(), fp, opts, revno, path_prefix)
0.40.43 by Parth Malwankar
moved cmd_grep._grep_dir to grep.dir_grep
453
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
454
    if revno is not None: # grep versioned files
455
        for (path, tree_path), chunks in tree.iter_files_bytes(to_grep):
0.40.85 by Parth Malwankar
optimized versioned grep to use iter_files_bytes.
456
            path = _make_display_path(relpath, path)
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
457
            _file_grep(b''.join(chunks), path, opts, revno, path_prefix,
6928.1.1 by Jelmer Vernooij
Pass in correct paths in grep.
458
                tree.get_file_revision(tree_path))
0.40.43 by Parth Malwankar
moved cmd_grep._grep_dir to grep.dir_grep
459
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
460
0.41.8 by Parth Malwankar
code cleanup.
461
def _make_display_path(relpath, path):
462
    """Return path string relative to user cwd.
0.40.42 by Parth Malwankar
fix to make grep paths relative to cwd
463
0.41.8 by Parth Malwankar
code cleanup.
464
    Take tree's 'relpath' and user supplied 'path', and return path
465
    that can be displayed to the user.
466
    """
0.40.15 by Parth Malwankar
some fixes and test updates
467
    if relpath:
0.40.52 by Parth Malwankar
code cleanup and documentation
468
        # update path so to display it w.r.t cwd
469
        # handle windows slash separator
0.40.20 by Parth Malwankar
used path functions from bzrlib.osutils
470
        path = osutils.normpath(osutils.pathjoin(relpath, path))
0.40.22 by Parth Malwankar
fixed display path formatting on windows
471
        path = path.replace('\\', '/')
472
        path = path.replace(relpath + '/', '', 1)
0.41.8 by Parth Malwankar
code cleanup.
473
    return path
474
475
6874.2.5 by Jelmer Vernooij
Fix grep.
476
def versioned_file_grep(tree, tree_path, relpath, path, opts, revno, path_prefix = None):
0.41.10 by Parth Malwankar
code cleanup. added comments. path adjustment is now done
477
    """Create a file object for the specified id and pass it on to _file_grep.
478
    """
479
480
    path = _make_display_path(relpath, path)
6874.2.5 by Jelmer Vernooij
Fix grep.
481
    file_text = tree.get_file_text(tree_path)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
482
    _file_grep(file_text, path, opts, revno, path_prefix)
0.41.21 by Parth Malwankar
include/exclude working now. tests not added.
483
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
484
0.41.21 by Parth Malwankar
include/exclude working now. tests not added.
485
def _path_in_glob_list(path, glob_list):
486
    for glob in glob_list:
487
        if fnmatch(path, glob):
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
488
            return True
489
    return False
0.41.12 by Parth Malwankar
initial support for working tree grep (no test cases yet!)
490
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
491
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
492
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
493
    # test and skip binary files
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
494
    if b'\x00' in file.read(1024):
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
495
        if opts.verbose:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
496
            trace.warning("Binary file '%s' skipped.", path)
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
497
        return
0.40.118 by Parth Malwankar
further optimization of _file_grep_list_only_wtree.
498
499
    file.seek(0) # search from beginning
500
501
    found = False
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
502
    if opts.fixed_string:
503
        pattern = opts.pattern.encode(_user_encoding, 'replace')
0.46.1 by Martin
Make -Fi use regexps for re.IGNORECASE rather than double str.lower
504
        for line in file:
505
            if pattern in line:
506
                found = True
507
                break
0.40.121 by Parth Malwankar
initial implementation of -L/--files-without-matches. no tests.
508
    else: # not fixed_string
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
509
        for line in file:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
510
            if opts.patternc.search(line):
0.40.118 by Parth Malwankar
further optimization of _file_grep_list_only_wtree.
511
                found = True
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
512
                break
513
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
514
    if (opts.files_with_matches and found) or \
515
        (opts.files_without_match and not found):
0.40.118 by Parth Malwankar
further optimization of _file_grep_list_only_wtree.
516
        if path_prefix and path_prefix != '.':
517
            # user has passed a dir arg, show that as result prefix
518
            path = osutils.pathjoin(path_prefix, path)
0.46.18 by Martin
Fix another, previously existing issue with colour and match-only
519
        opts.outputter.get_writer(path, None, None)()
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
520
521
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
522
class _Outputter(object):
523
    """Precalculate formatting based on options given
524
525
    The idea here is to do this work only once per run, and finally return a
526
    function that will do the minimum amount possible for each match.
0.46.3 by Martin
Start moving formatting setup out of _file_grep, only for files_with_matches so far
527
    """
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
528
    def __init__(self, opts, use_cache=False):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
529
        self.outf = opts.outf
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
530
        if use_cache:
531
            # self.cache is used to cache results for dir grep based on fid.
532
            # If the fid is does not change between results, it means that
533
            # the result will be the same apart from revno. In such a case
534
            # we avoid getting file chunks from repo and grepping. The result
535
            # is just printed by replacing old revno with new one.
536
            self.cache = {}
537
        else:
538
            self.cache = None
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
539
        no_line = opts.files_with_matches or opts.files_without_match
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
540
541
        if opts.show_color:
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
542
            if no_line:
543
                self.get_writer = self._get_writer_plain
544
            elif opts.fixed_string:
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
545
                self._old = opts.pattern
546
                self._new = color_string(opts.pattern, FG.BOLD_RED)
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
547
                self.get_writer = self._get_writer_fixed_highlighted
548
            else:
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
549
                flags = opts.patternc.flags
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
550
                self._sub = re.compile(opts.pattern.join(("((?:", ")+)")), flags).sub
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
551
                self._highlight = color_string("\\1", FG.BOLD_RED)
552
                self.get_writer = self._get_writer_regexp_highlighted
553
            path_start = FG.MAGENTA
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
554
            path_end = FG.NONE
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
555
            sep = color_string(':', FG.BOLD_CYAN)
556
            rev_sep = color_string('~', FG.BOLD_YELLOW)
557
        else:
558
            self.get_writer = self._get_writer_plain
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
559
            path_start = path_end = ""
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
560
            sep = ":"
561
            rev_sep = "~"
562
563
        parts = [path_start, "%(path)s"]
0.46.3 by Martin
Start moving formatting setup out of _file_grep, only for files_with_matches so far
564
        if opts.print_revno:
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
565
            parts.extend([rev_sep, "%(revno)s"])
0.46.13 by Martin
Split format string into two parts for non-cached operations too
566
        self._format_initial = "".join(parts)
567
        parts = []
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
568
        if no_line:
569
            if not opts.print_revno:
570
                parts.append(path_end)
571
        else:
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
572
            if opts.line_number:
0.46.13 by Martin
Split format string into two parts for non-cached operations too
573
                parts.extend([sep, "%(lineno)s"])
574
            parts.extend([sep, "%(line)s"])
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
575
        parts.append(opts.eol_marker)
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
576
        self._format_perline = "".join(parts)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
577
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
578
    def _get_writer_plain(self, path, revno, cache_id):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
579
        """Get function for writing uncoloured output"""
0.46.13 by Martin
Split format string into two parts for non-cached operations too
580
        per_line = self._format_perline
581
        start = self._format_initial % {"path":path, "revno":revno}
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
582
        write = self.outf.write
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
583
        if self.cache is not None and cache_id is not None:
584
            result_list = []
585
            self.cache[cache_id] = path, result_list
586
            add_to_cache = result_list.append
587
            def _line_cache_and_writer(**kwargs):
588
                """Write formatted line and cache arguments"""
0.46.12 by Martin
Split format string for cache to only store a string, not a dict
589
                end = per_line % kwargs
590
                add_to_cache(end)
591
                write(start + end)
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
592
            return _line_cache_and_writer
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
593
        def _line_writer(**kwargs):
594
            """Write formatted line from arguments given by underlying opts"""
0.46.13 by Martin
Split format string into two parts for non-cached operations too
595
            write(start + per_line % kwargs)
0.46.8 by Martin
Move pattern highlighting out of _file_grep and into the line writing code
596
        return _line_writer
597
0.46.11 by Martin
Add method to outputter for writing cached lines
598
    def write_cached_lines(self, cache_id, revno):
599
        """Write cached results out again for new revision"""
600
        cached_path, cached_matches = self.cache[cache_id]
0.46.12 by Martin
Split format string for cache to only store a string, not a dict
601
        start = self._format_initial % {"path":cached_path, "revno":revno}
0.46.11 by Martin
Add method to outputter for writing cached lines
602
        write = self.outf.write
0.46.12 by Martin
Split format string for cache to only store a string, not a dict
603
        for end in cached_matches:
604
            write(start + end)
0.46.11 by Martin
Add method to outputter for writing cached lines
605
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
606
    def _get_writer_regexp_highlighted(self, path, revno, cache_id):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
607
        """Get function for writing output with regexp match highlighted"""
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
608
        _line_writer = self._get_writer_plain(path, revno, cache_id)
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
609
        sub, highlight = self._sub, self._highlight
0.46.8 by Martin
Move pattern highlighting out of _file_grep and into the line writing code
610
        def _line_writer_regexp_highlighted(line, **kwargs):
611
            """Write formatted line with matched pattern highlighted"""
612
            return _line_writer(line=sub(highlight, line), **kwargs)
613
        return _line_writer_regexp_highlighted
614
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
615
    def _get_writer_fixed_highlighted(self, path, revno, cache_id):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
616
        """Get function for writing output with search string highlighted"""
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
617
        _line_writer = self._get_writer_plain(path, revno, cache_id)
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
618
        old, new = self._old, self._new
619
        def _line_writer_fixed_highlighted(line, **kwargs):
620
            """Write formatted line with string searched for highlighted"""
621
            return _line_writer(line=line.replace(old, new), **kwargs)
622
        return _line_writer_fixed_highlighted
0.46.3 by Martin
Start moving formatting setup out of _file_grep, only for files_with_matches so far
623
624
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
625
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
0.41.9 by Parth Malwankar
refactored code towards support for working tree grep.
626
    # test and skip binary files
6977.2.1 by Jelmer Vernooij
Require that get_file implementations are contect managers, simplify file handling in transform.
627
    if b'\x00' in file_text[:1024]:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
628
        if opts.verbose:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
629
            trace.warning("Binary file '%s' skipped.", path)
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
630
        return
0.41.9 by Parth Malwankar
refactored code towards support for working tree grep.
631
0.40.52 by Parth Malwankar
code cleanup and documentation
632
    if path_prefix and path_prefix != '.':
633
        # user has passed a dir arg, show that as result prefix
634
        path = osutils.pathjoin(path_prefix, path)
635
0.46.21 by Martin
Fix and test bytes/unicode issue but there's more to do in this area
636
    # GZ 2010-06-07: There's no actual guarentee the file contents will be in
637
    #                the user encoding, but we have to guess something and it
638
    #                is a reasonable default without a better mechanism.
639
    file_encoding = _user_encoding
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
640
    pattern = opts.pattern.encode(_user_encoding, 'replace')
0.43.8 by Parth Malwankar
added color for regex pattern.
641
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
642
    writeline = opts.outputter.get_writer(path, revno, cache_id)
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
643
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
644
    if opts.files_with_matches or opts.files_without_match:
0.46.2 by Martin
Remove redundant code on files_with_matches path in _file_grep
645
        if opts.fixed_string:
6691.1.5 by Jelmer Vernooij
Drop support for Python <= 2.5.
646
            found = pattern in file_text
0.40.112 by Parth Malwankar
support for -l, --files-with-matches. no tests yet.
647
        else:
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
648
            search = opts.patternc.search
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
649
            if b"$" not in pattern:
0.47.2 by Martin
Use whole text search for match only cases where possible as well
650
                found = search(file_text) is not None
651
            else:
652
                for line in file_text.splitlines():
653
                    if search(line):
654
                        found = True
655
                        break
656
                else:
657
                    found = False
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
658
        if (opts.files_with_matches and found) or \
659
                (opts.files_without_match and not found):
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
660
            writeline()
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
661
    elif opts.fixed_string:
0.47.1 by Martin
Implement whole text search for fast failure on no match
662
        # Fast path for no match, search through the entire file at once rather
6619.3.25 by Jelmer Vernooij
Drop some old dependency checks.
663
        # than a line at a time. <http://effbot.org/zone/stringlib.htm>
664
        i = file_text.find(pattern)
665
        if i == -1:
666
            return
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
667
        b = file_text.rfind(b"\n", 0, i) + 1
6619.3.25 by Jelmer Vernooij
Drop some old dependency checks.
668
        if opts.line_number:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
669
            start = file_text.count(b"\n", 0, b) + 1
6619.3.25 by Jelmer Vernooij
Drop some old dependency checks.
670
        file_text = file_text[b:]
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
671
        if opts.line_number:
0.46.5 by Martin
Delete now redundant duplicated loops in _file_grep
672
            for index, line in enumerate(file_text.splitlines()):
673
                if pattern in line:
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
674
                    line = line.decode(file_encoding, 'replace')
0.47.1 by Martin
Implement whole text search for fast failure on no match
675
                    writeline(lineno=index+start, line=line)
0.46.5 by Martin
Delete now redundant duplicated loops in _file_grep
676
        else:
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
677
            for line in file_text.splitlines():
678
                if pattern in line:
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
679
                    line = line.decode(file_encoding, 'replace')
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
680
                    writeline(line=line)
0.40.63 by Parth Malwankar
performance: moved conditionals out of core loop.
681
    else:
0.47.1 by Martin
Implement whole text search for fast failure on no match
682
        # Fast path on no match, the re module avoids bad behaviour in most
683
        # standard cases, but perhaps could try and detect backtracking
684
        # patterns here and avoid whole text search in those cases
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
685
        search = opts.patternc.search
7027.9.1 by Jelmer Vernooij
Fix all but one remaining grep tests.
686
        if b"$" not in pattern:
0.47.1 by Martin
Implement whole text search for fast failure on no match
687
            # GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
688
            #                through revisions as bazaar returns binary mode
689
            #                and trailing \r breaks $ as line ending match
690
            m = search(file_text)
691
            if m is None:
692
                return
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
693
            b = file_text.rfind(b"\n", 0, m.start()) + 1
0.47.1 by Martin
Implement whole text search for fast failure on no match
694
            if opts.line_number:
7027.3.3 by Jelmer Vernooij
Add some more bees; support writing both bytes and unicode strings in build_tree_contents.
695
                start = file_text.count(b"\n", 0, b) + 1
0.47.4 by Martin
Scale back no-match fast path to avoid some behaviour changes with line endings
696
            file_text = file_text[b:]
0.47.3 by Martin
Fix previously untested bug with regexp and line numbers introduced by optimisation
697
        else:
698
            start = 1
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
699
        if opts.line_number:
700
            for index, line in enumerate(file_text.splitlines()):
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
701
                if search(line):
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
702
                    line = line.decode(file_encoding, 'replace')
0.47.3 by Martin
Fix previously untested bug with regexp and line numbers introduced by optimisation
703
                    writeline(lineno=index+start, line=line)
0.40.83 by Parth Malwankar
added support for -F/--fixed-string.
704
        else:
705
            for line in file_text.splitlines():
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
706
                if search(line):
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
707
                    line = line.decode(file_encoding, 'replace')
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
708
                    writeline(line=line)
0.40.139 by Parth Malwankar
(Martin [gz]) Added fast path for no match that avoids splitting the
709