/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.40.10 by Parth Malwankar
assigned copyright to canonical
1
# Copyright (C) 2010 Canonical Ltd
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
0.47.1 by Martin
Implement whole text search for fast failure on no match
17
import os
18
import sys
19
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
20
from bzrlib.lazy_import import lazy_import
21
lazy_import(globals(), """
0.40.83 by Parth Malwankar
added support for -F/--fixed-string.
22
from fnmatch import fnmatch
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
23
import re
24
0.43.8 by Parth Malwankar
added color for regex pattern.
25
from termcolor import color_string, re_color_string, FG
0.43.4 by Parth Malwankar
initial support for color for fixed string grep.
26
27
0.41.12 by Parth Malwankar
initial support for working tree grep (no test cases yet!)
28
from bzrlib import bzrdir
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
29
from bzrlib.workingtree import WorkingTree
0.40.95 by Parth Malwankar
faster mainline rev grep
30
from bzrlib.revisionspec import RevisionSpec, RevisionSpec_revid, RevisionSpec_revno
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
31
from bzrlib import (
32
    errors,
33
    lazy_regex,
0.40.47 by Parth Malwankar
fixes bug #531336. binary files are now skipped.
34
    osutils,
35
    textfile,
36
    trace,
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
37
    )
38
""")
39
0.40.83 by Parth Malwankar
added support for -F/--fixed-string.
40
_terminal_encoding = osutils.get_terminal_encoding()
41
_user_encoding = osutils.get_user_encoding()
42
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
43
0.40.95 by Parth Malwankar
faster mainline rev grep
44
class _RevisionNotLinear(Exception):
45
    """Raised when a revision is not on left-hand history."""
46
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
47
0.40.95 by Parth Malwankar
faster mainline rev grep
48
def _rev_on_mainline(rev_tuple):
49
    """returns True is rev tuple is on mainline"""
50
    if len(rev_tuple) == 1:
51
        return True
52
    return rev_tuple[1] == 0 and rev_tuple[2] == 0
53
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
54
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
55
# NOTE: _linear_view_revisions is basided on
56
# bzrlib.log._linear_view_revisions.
57
# This should probably be a common public API
0.40.95 by Parth Malwankar
faster mainline rev grep
58
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
0.40.106 by Parth Malwankar
fixed error in dotted rev reverse search.
59
    # requires that start is older than end
0.40.95 by Parth Malwankar
faster mainline rev grep
60
    repo = branch.repository
61
    for revision_id in repo.iter_reverse_revision_history(end_rev_id):
62
        revno = branch.revision_id_to_dotted_revno(revision_id)
63
        revno_str = '.'.join(str(n) for n in revno)
64
        if revision_id == start_rev_id:
65
            yield revision_id, revno_str, 0
66
            break
67
        yield revision_id, revno_str, 0
68
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
69
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
70
# NOTE: _graph_view_revisions is copied from
71
# bzrlib.log._graph_view_revisions.
72
# This should probably be a common public API
73
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
74
                          rebase_initial_depths=True):
75
    """Calculate revisions to view including merges, newest to oldest.
76
77
    :param branch: the branch
78
    :param start_rev_id: the lower revision-id
79
    :param end_rev_id: the upper revision-id
80
    :param rebase_initial_depth: should depths be rebased until a mainline
81
      revision is found?
82
    :return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
83
    """
0.40.106 by Parth Malwankar
fixed error in dotted rev reverse search.
84
    # requires that start is older than end
0.40.100 by Parth Malwankar
removed dependency on log._graph_view_revisions
85
    view_revisions = branch.iter_merge_sorted_revisions(
86
        start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
87
        stop_rule="with-merges")
88
    if not rebase_initial_depths:
89
        for (rev_id, merge_depth, revno, end_of_merge
90
             ) in view_revisions:
91
            yield rev_id, '.'.join(map(str, revno)), merge_depth
92
    else:
93
        # We're following a development line starting at a merged revision.
94
        # We need to adjust depths down by the initial depth until we find
95
        # a depth less than it. Then we use that depth as the adjustment.
96
        # If and when we reach the mainline, depth adjustment ends.
97
        depth_adjustment = None
98
        for (rev_id, merge_depth, revno, end_of_merge
99
             ) in view_revisions:
100
            if depth_adjustment is None:
101
                depth_adjustment = merge_depth
102
            if depth_adjustment:
103
                if merge_depth < depth_adjustment:
104
                    # From now on we reduce the depth adjustement, this can be
105
                    # surprising for users. The alternative requires two passes
106
                    # which breaks the fast display of the first revision
107
                    # though.
108
                    depth_adjustment = merge_depth
109
                merge_depth -= depth_adjustment
110
            yield rev_id, '.'.join(map(str, revno)), merge_depth
111
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
112
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
113
def compile_pattern(pattern, flags=0):
114
    patternc = None
115
    try:
116
        # use python's re.compile as we need to catch re.error in case of bad pattern
117
        lazy_regex.reset_compile()
118
        patternc = re.compile(pattern, flags)
119
    except re.error, e:
120
        raise errors.BzrError("Invalid pattern: '%s'" % pattern)
121
    return patternc
122
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
123
0.40.86 by Parth Malwankar
the check for implicit fixed_string now allows for spaces.
124
def is_fixed_string(s):
0.40.101 by Parth Malwankar
added underscore to --fixed-string whitelist
125
    if re.match("^([A-Za-z0-9_]|\s)*$", s):
0.40.86 by Parth Malwankar
the check for implicit fixed_string now allows for spaces.
126
        return True
127
    return False
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
128
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
129
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
130
def versioned_grep(opts):
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
131
    wt, branch, relpath = \
132
        bzrdir.BzrDir.open_containing_tree_or_branch('.')
133
    branch.lock_read()
0.40.88 by Parth Malwankar
updated to avoid relocking.
134
    try:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
135
        start_rev = opts.revision[0]
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
136
        start_revid = start_rev.as_revision_id(branch)
0.40.95 by Parth Malwankar
faster mainline rev grep
137
        if start_revid == None:
138
            start_rev = RevisionSpec_revno.from_string("revno:1")
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
139
            start_revid = start_rev.as_revision_id(branch)
140
        srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
0.40.88 by Parth Malwankar
updated to avoid relocking.
141
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
142
        if len(opts.revision) == 2:
143
            end_rev = opts.revision[1]
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
144
            end_revid = end_rev.as_revision_id(branch)
0.40.95 by Parth Malwankar
faster mainline rev grep
145
            if end_revid == None:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
146
                end_revno, end_revid = branch.last_revision_info()
147
            erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
0.40.95 by Parth Malwankar
faster mainline rev grep
148
0.40.106 by Parth Malwankar
fixed error in dotted rev reverse search.
149
            grep_mainline = (_rev_on_mainline(srevno_tuple) and
150
                _rev_on_mainline(erevno_tuple))
151
152
            # ensure that we go in reverse order
153
            if srevno_tuple > erevno_tuple:
154
                srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
155
                start_revid, end_revid = end_revid, start_revid
0.40.97 by Parth Malwankar
fixed caching bug for rev range.
156
0.40.95 by Parth Malwankar
faster mainline rev grep
157
            # Optimization: Traversing the mainline in reverse order is much
158
            # faster when we don't want to look at merged revs. We try this
159
            # with _linear_view_revisions. If all revs are to be grepped we
160
            # use the slower _graph_view_revisions
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
161
            if opts.levels==1 and grep_mainline:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
162
                given_revs = _linear_view_revisions(branch, start_revid, end_revid)
0.40.95 by Parth Malwankar
faster mainline rev grep
163
            else:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
164
                given_revs = _graph_view_revisions(branch, start_revid, end_revid)
0.40.88 by Parth Malwankar
updated to avoid relocking.
165
        else:
0.40.94 by Parth Malwankar
code cleanup. moved start_rev_tuple into if cond that uses it.
166
            # We do an optimization below. For grepping a specific revison
167
            # We don't need to call _graph_view_revisions which is slow.
168
            # We create the start_rev_tuple for only that specific revision.
169
            # _graph_view_revisions is used only for revision range.
170
            start_revno = '.'.join(map(str, srevno_tuple))
171
            start_rev_tuple = (start_revid, start_revno, 0)
0.40.88 by Parth Malwankar
updated to avoid relocking.
172
            given_revs = [start_rev_tuple]
173
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
174
        # GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
175
        opts.outputter = _Outputter(opts, use_cache=True)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
176
0.40.88 by Parth Malwankar
updated to avoid relocking.
177
        for revid, revno, merge_depth in given_revs:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
178
            if opts.levels == 1 and merge_depth != 0:
0.40.88 by Parth Malwankar
updated to avoid relocking.
179
                # with level=1 show only top level
180
                continue
181
182
            rev = RevisionSpec_revid.from_string("revid:"+revid)
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
183
            tree = rev.as_tree(branch)
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
184
            for path in opts.path_list:
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
185
                path_for_id = osutils.pathjoin(relpath, path)
186
                id = tree.path2id(path_for_id)
187
                if not id:
0.41.22 by Parth Malwankar
added basic --exclude/include tests
188
                    trace.warning("Skipped unknown file '%s'." % path)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
189
                    continue
190
191
                if osutils.isdir(path):
192
                    path_prefix = path
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
193
                    dir_grep(tree, path, relpath, opts, revno, path_prefix)
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
194
                else:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
195
                    versioned_file_grep(tree, id, '.', path, opts, revno)
0.40.88 by Parth Malwankar
updated to avoid relocking.
196
    finally:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
197
        branch.unlock()
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
198
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
199
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
200
def workingtree_grep(opts):
201
    revno = opts.print_revno = None # for working tree set revno to None
0.40.69 by Parth Malwankar
reduced lock/unlock
202
203
    tree, branch, relpath = \
204
        bzrdir.BzrDir.open_containing_tree_or_branch('.')
0.40.130 by Parth Malwankar
grep in a branch with no tree does not throw stack trace (#572658)
205
    if not tree:
0.40.131 by Parth Malwankar
bzr grep now allows grepping with -r even when no tree exists.
206
        msg = ('Cannot search working tree. Working tree not found.\n'
207
            'To search for specific revision in history use the -r option.')
0.40.130 by Parth Malwankar
grep in a branch with no tree does not throw stack trace (#572658)
208
        raise errors.BzrCommandError(msg)
209
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
210
    # GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
211
    opts.outputter = _Outputter(opts)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
212
0.40.69 by Parth Malwankar
reduced lock/unlock
213
    tree.lock_read()
214
    try:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
215
        for path in opts.path_list:
0.40.69 by Parth Malwankar
reduced lock/unlock
216
            if osutils.isdir(path):
217
                path_prefix = path
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
218
                dir_grep(tree, path, relpath, opts, revno, path_prefix)
0.40.69 by Parth Malwankar
reduced lock/unlock
219
            else:
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
220
                _file_grep(open(path).read(), path, opts, revno)
0.40.69 by Parth Malwankar
reduced lock/unlock
221
    finally:
222
        tree.unlock()
0.41.11 by Parth Malwankar
moved top level grep code to versioned_grep.
223
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
224
0.40.74 by Parth Malwankar
optimization. --include/exclude are checked before reading the file.
225
def _skip_file(include, exclude, path):
226
    if include and not _path_in_glob_list(path, include):
227
        return True
228
    if exclude and _path_in_glob_list(path, exclude):
229
        return True
230
    return False
231
232
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
233
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
234
    # setup relpath to open files relative to cwd
235
    rpath = relpath
236
    if relpath:
237
        rpath = osutils.pathjoin('..',relpath)
238
239
    from_dir = osutils.pathjoin(relpath, path)
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
240
    if opts.from_root:
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
241
        # start searching recursively from root
242
        from_dir=None
243
        recursive=True
244
0.40.85 by Parth Malwankar
optimized versioned grep to use iter_files_bytes.
245
    to_grep = []
0.40.92 by Parth Malwankar
performance tweaks to core cached result print loop.
246
    to_grep_append = to_grep.append
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
247
    # GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
248
    #                and hits manually refilled. Could do this again if it was
249
    #                for a good reason, otherwise cache might want purging.
250
    outputter = opts.outputter
0.40.69 by Parth Malwankar
reduced lock/unlock
251
    for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
252
        from_dir=from_dir, recursive=opts.recursive):
0.40.69 by Parth Malwankar
reduced lock/unlock
253
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
254
        if _skip_file(opts.include, opts.exclude, fp):
0.40.74 by Parth Malwankar
optimization. --include/exclude are checked before reading the file.
255
            continue
256
0.40.69 by Parth Malwankar
reduced lock/unlock
257
        if fc == 'V' and fkind == 'file':
258
            if revno != None:
0.40.90 by Parth Malwankar
significant speedup for revision range grep by caching old result.
259
                # If old result is valid, print results immediately.
260
                # Otherwise, add file info to to_grep so that the
261
                # loop later will get chunks and grep them
0.46.11 by Martin
Add method to outputter for writing cached lines
262
                cache_id = tree.inventory[fid].revision
263
                if cache_id in outputter.cache:
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
264
                    # GZ 2010-06-05: Not really sure caching and re-outputting
265
                    #                the old path is really the right thing,
266
                    #                but it's what the old code seemed to do
0.46.11 by Martin
Add method to outputter for writing cached lines
267
                    outputter.write_cached_lines(cache_id, revno)
0.40.90 by Parth Malwankar
significant speedup for revision range grep by caching old result.
268
                else:
0.40.92 by Parth Malwankar
performance tweaks to core cached result print loop.
269
                    to_grep_append((fid, (fp, fid)))
0.40.69 by Parth Malwankar
reduced lock/unlock
270
            else:
271
                # we are grepping working tree.
272
                if from_dir == None:
273
                    from_dir = '.'
274
275
                path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
276
                if opts.files_with_matches or opts.files_without_match:
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
277
                    # Optimize for wtree list-only as we don't need to read the
278
                    # entire file
0.46.20 by Martin
Remove some unneeded imports
279
                    file = open(path_for_file, 'r', buffering=4096)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
280
                    _file_grep_list_only_wtree(file, fp, opts, path_prefix)
0.40.121 by Parth Malwankar
initial implementation of -L/--files-without-matches. no tests.
281
                else:
0.46.20 by Martin
Remove some unneeded imports
282
                    file_text = open(path_for_file, 'r').read()
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
283
                    _file_grep(file_text, fp, opts, revno, path_prefix)
0.40.43 by Parth Malwankar
moved cmd_grep._grep_dir to grep.dir_grep
284
0.40.85 by Parth Malwankar
optimized versioned grep to use iter_files_bytes.
285
    if revno != None: # grep versioned files
0.40.90 by Parth Malwankar
significant speedup for revision range grep by caching old result.
286
        for (path, fid), chunks in tree.iter_files_bytes(to_grep):
0.40.85 by Parth Malwankar
optimized versioned grep to use iter_files_bytes.
287
            path = _make_display_path(relpath, path)
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
288
            _file_grep(chunks[0], path, opts, revno, path_prefix,
289
                tree.inventory[fid].revision)
0.40.43 by Parth Malwankar
moved cmd_grep._grep_dir to grep.dir_grep
290
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
291
0.41.8 by Parth Malwankar
code cleanup.
292
def _make_display_path(relpath, path):
293
    """Return path string relative to user cwd.
0.40.42 by Parth Malwankar
fix to make grep paths relative to cwd
294
0.41.8 by Parth Malwankar
code cleanup.
295
    Take tree's 'relpath' and user supplied 'path', and return path
296
    that can be displayed to the user.
297
    """
0.40.15 by Parth Malwankar
some fixes and test updates
298
    if relpath:
0.40.52 by Parth Malwankar
code cleanup and documentation
299
        # update path so to display it w.r.t cwd
300
        # handle windows slash separator
0.40.20 by Parth Malwankar
used path functions from bzrlib.osutils
301
        path = osutils.normpath(osutils.pathjoin(relpath, path))
0.40.22 by Parth Malwankar
fixed display path formatting on windows
302
        path = path.replace('\\', '/')
303
        path = path.replace(relpath + '/', '', 1)
0.41.8 by Parth Malwankar
code cleanup.
304
    return path
305
306
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
307
def versioned_file_grep(tree, id, relpath, path, opts, revno, path_prefix = None):
0.41.10 by Parth Malwankar
code cleanup. added comments. path adjustment is now done
308
    """Create a file object for the specified id and pass it on to _file_grep.
309
    """
310
311
    path = _make_display_path(relpath, path)
0.41.12 by Parth Malwankar
initial support for working tree grep (no test cases yet!)
312
    file_text = tree.get_file_text(id)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
313
    _file_grep(file_text, path, opts, revno, path_prefix)
0.41.21 by Parth Malwankar
include/exclude working now. tests not added.
314
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
315
0.41.21 by Parth Malwankar
include/exclude working now. tests not added.
316
def _path_in_glob_list(path, glob_list):
317
    for glob in glob_list:
318
        if fnmatch(path, glob):
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
319
            return True
320
    return False
0.41.12 by Parth Malwankar
initial support for working tree grep (no test cases yet!)
321
0.40.117 by Parth Malwankar
cosmetic fix. added two lines between top level functions.
322
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
323
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
324
    # test and skip binary files
325
    if '\x00' in file.read(1024):
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
326
        if opts.verbose:
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
327
            trace.warning("Binary file '%s' skipped." % path)
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
328
        return
0.40.118 by Parth Malwankar
further optimization of _file_grep_list_only_wtree.
329
330
    file.seek(0) # search from beginning
331
332
    found = False
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
333
    if opts.fixed_string:
334
        pattern = opts.pattern.encode(_user_encoding, 'replace')
0.46.1 by Martin
Make -Fi use regexps for re.IGNORECASE rather than double str.lower
335
        for line in file:
336
            if pattern in line:
337
                found = True
338
                break
0.40.121 by Parth Malwankar
initial implementation of -L/--files-without-matches. no tests.
339
    else: # not fixed_string
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
340
        for line in file:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
341
            if opts.patternc.search(line):
0.40.118 by Parth Malwankar
further optimization of _file_grep_list_only_wtree.
342
                found = True
0.40.116 by Parth Malwankar
optimization for wtree list-only grep to avoid full file read.
343
                break
344
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
345
    if (opts.files_with_matches and found) or \
346
        (opts.files_without_match and not found):
0.40.118 by Parth Malwankar
further optimization of _file_grep_list_only_wtree.
347
        if path_prefix and path_prefix != '.':
348
            # user has passed a dir arg, show that as result prefix
349
            path = osutils.pathjoin(path_prefix, path)
0.46.18 by Martin
Fix another, previously existing issue with colour and match-only
350
        opts.outputter.get_writer(path, None, None)()
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
351
352
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
353
class _Outputter(object):
354
    """Precalculate formatting based on options given
355
356
    The idea here is to do this work only once per run, and finally return a
357
    function that will do the minimum amount possible for each match.
0.46.3 by Martin
Start moving formatting setup out of _file_grep, only for files_with_matches so far
358
    """
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
359
    def __init__(self, opts, use_cache=False):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
360
        self.outf = opts.outf
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
361
        if use_cache:
362
            # self.cache is used to cache results for dir grep based on fid.
363
            # If the fid is does not change between results, it means that
364
            # the result will be the same apart from revno. In such a case
365
            # we avoid getting file chunks from repo and grepping. The result
366
            # is just printed by replacing old revno with new one.
367
            self.cache = {}
368
        else:
369
            self.cache = None
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
370
        no_line = opts.files_with_matches or opts.files_without_match
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
371
372
        if opts.show_color:
373
            pat = opts.pattern.encode(_user_encoding, 'replace')
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
374
            if no_line:
375
                self.get_writer = self._get_writer_plain
376
            elif opts.fixed_string:
377
                self._old = pat
378
                self._new = color_string(pat, FG.BOLD_RED)
379
                self.get_writer = self._get_writer_fixed_highlighted
380
            else:
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
381
                flags = opts.patternc.flags
382
                self._sub = re.compile(pat.join(("((?:",")+)")), flags).sub
383
                self._highlight = color_string("\\1", FG.BOLD_RED)
384
                self.get_writer = self._get_writer_regexp_highlighted
385
            path_start = FG.MAGENTA
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
386
            path_end = FG.NONE
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
387
            sep = color_string(':', FG.BOLD_CYAN)
388
            rev_sep = color_string('~', FG.BOLD_YELLOW)
389
        else:
390
            self.get_writer = self._get_writer_plain
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
391
            path_start = path_end = ""
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
392
            sep = ":"
393
            rev_sep = "~"
394
395
        parts = [path_start, "%(path)s"]
0.46.3 by Martin
Start moving formatting setup out of _file_grep, only for files_with_matches so far
396
        if opts.print_revno:
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
397
            parts.extend([rev_sep, "%(revno)s"])
0.46.13 by Martin
Split format string into two parts for non-cached operations too
398
        self._format_initial = "".join(parts)
399
        parts = []
0.46.17 by Martin
Fix previously untested issue with colour and match-only, and test a related issue
400
        if no_line:
401
            if not opts.print_revno:
402
                parts.append(path_end)
403
        else:
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
404
            if opts.line_number:
0.46.13 by Martin
Split format string into two parts for non-cached operations too
405
                parts.extend([sep, "%(lineno)s"])
406
            parts.extend([sep, "%(line)s"])
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
407
        parts.append(opts.eol_marker)
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
408
        self._format_perline = "".join(parts)
0.46.7 by Martin
Move line writing function up the stack so it lasts the whole operation, and clean up some params
409
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
410
    def _get_writer_plain(self, path, revno, cache_id):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
411
        """Get function for writing uncoloured output"""
0.46.13 by Martin
Split format string into two parts for non-cached operations too
412
        per_line = self._format_perline
413
        start = self._format_initial % {"path":path, "revno":revno}
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
414
        write = self.outf.write
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
415
        if self.cache is not None and cache_id is not None:
416
            result_list = []
417
            self.cache[cache_id] = path, result_list
418
            add_to_cache = result_list.append
419
            def _line_cache_and_writer(**kwargs):
420
                """Write formatted line and cache arguments"""
0.46.12 by Martin
Split format string for cache to only store a string, not a dict
421
                end = per_line % kwargs
422
                add_to_cache(end)
423
                write(start + end)
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
424
            return _line_cache_and_writer
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
425
        def _line_writer(**kwargs):
426
            """Write formatted line from arguments given by underlying opts"""
0.46.13 by Martin
Split format string into two parts for non-cached operations too
427
            write(start + per_line % kwargs)
0.46.8 by Martin
Move pattern highlighting out of _file_grep and into the line writing code
428
        return _line_writer
429
0.46.11 by Martin
Add method to outputter for writing cached lines
430
    def write_cached_lines(self, cache_id, revno):
431
        """Write cached results out again for new revision"""
432
        cached_path, cached_matches = self.cache[cache_id]
0.46.12 by Martin
Split format string for cache to only store a string, not a dict
433
        start = self._format_initial % {"path":cached_path, "revno":revno}
0.46.11 by Martin
Add method to outputter for writing cached lines
434
        write = self.outf.write
0.46.12 by Martin
Split format string for cache to only store a string, not a dict
435
        for end in cached_matches:
436
            write(start + end)
0.46.11 by Martin
Add method to outputter for writing cached lines
437
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
438
    def _get_writer_regexp_highlighted(self, path, revno, cache_id):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
439
        """Get function for writing output with regexp match highlighted"""
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
440
        _line_writer = self._get_writer_plain(path, revno, cache_id)
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
441
        sub, highlight = self._sub, self._highlight
0.46.8 by Martin
Move pattern highlighting out of _file_grep and into the line writing code
442
        def _line_writer_regexp_highlighted(line, **kwargs):
443
            """Write formatted line with matched pattern highlighted"""
444
            return _line_writer(line=sub(highlight, line), **kwargs)
445
        return _line_writer_regexp_highlighted
446
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
447
    def _get_writer_fixed_highlighted(self, path, revno, cache_id):
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
448
        """Get function for writing output with search string highlighted"""
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
449
        _line_writer = self._get_writer_plain(path, revno, cache_id)
0.46.9 by Martin
Give in and make formatter a class so path and revno only need to be passed once per file
450
        old, new = self._old, self._new
451
        def _line_writer_fixed_highlighted(line, **kwargs):
452
            """Write formatted line with string searched for highlighted"""
453
            return _line_writer(line=line.replace(old, new), **kwargs)
454
        return _line_writer_fixed_highlighted
0.46.3 by Martin
Start moving formatting setup out of _file_grep, only for files_with_matches so far
455
456
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
457
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
0.41.9 by Parth Malwankar
refactored code towards support for working tree grep.
458
    # test and skip binary files
0.40.62 by Parth Malwankar
performance optimization
459
    if '\x00' in file_text[:1024]:
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
460
        if opts.verbose:
0.40.60 by Parth Malwankar
'binary file skipped' warning is only shown with --verbose flag
461
            trace.warning("Binary file '%s' skipped." % path)
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
462
        return
0.41.9 by Parth Malwankar
refactored code towards support for working tree grep.
463
0.40.52 by Parth Malwankar
code cleanup and documentation
464
    if path_prefix and path_prefix != '.':
465
        # user has passed a dir arg, show that as result prefix
466
        path = osutils.pathjoin(path_prefix, path)
467
0.46.21 by Martin
Fix and test bytes/unicode issue but there's more to do in this area
468
    # GZ 2010-06-07: There's no actual guarentee the file contents will be in
469
    #                the user encoding, but we have to guess something and it
470
    #                is a reasonable default without a better mechanism.
471
    file_encoding = _user_encoding
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
472
    pattern = opts.pattern.encode(_user_encoding, 'replace')
0.43.8 by Parth Malwankar
added color for regex pattern.
473
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
474
    writeline = opts.outputter.get_writer(path, revno, cache_id)
0.40.9 by Parth Malwankar
factored out grep related code to grep.py
475
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
476
    if opts.files_with_matches or opts.files_without_match:
0.46.2 by Martin
Remove redundant code on files_with_matches path in _file_grep
477
        if opts.fixed_string:
0.47.2 by Martin
Use whole text search for match only cases where possible as well
478
            if sys.platform > (2, 5):
479
                found = pattern in file_text
480
            else:
481
                for line in file_text.splitlines():
482
                    if pattern in line:
483
                        found = True
484
                        break
485
                else:
486
                    found = False
0.40.112 by Parth Malwankar
support for -l, --files-with-matches. no tests yet.
487
        else:
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
488
            search = opts.patternc.search
0.47.4 by Martin
Scale back no-match fast path to avoid some behaviour changes with line endings
489
            if "$" not in pattern:
0.47.2 by Martin
Use whole text search for match only cases where possible as well
490
                found = search(file_text) is not None
491
            else:
492
                for line in file_text.splitlines():
493
                    if search(line):
494
                        found = True
495
                        break
496
                else:
497
                    found = False
0.43.1 by Parth Malwankar
added GrepOptions object for easy parameter passing
498
        if (opts.files_with_matches and found) or \
499
                (opts.files_without_match and not found):
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
500
            writeline()
0.46.19 by Martin
Minor pokes, fixes a bug with working tree optimisation and binary files
501
    elif opts.fixed_string:
0.47.1 by Martin
Implement whole text search for fast failure on no match
502
        # Fast path for no match, search through the entire file at once rather
503
        # than a line at a time. However, we don't want this without Python 2.5
504
        # as the quick string search algorithm wasn't implemented till then:
505
        # <http://effbot.org/zone/stringlib.htm>
506
        if sys.version_info > (2, 5):
507
            i = file_text.find(pattern)
508
            if i == -1:
509
                return
510
            b = file_text.rfind("\n", 0, i) + 1
511
            if opts.line_number:
0.47.4 by Martin
Scale back no-match fast path to avoid some behaviour changes with line endings
512
                start = file_text.count("\n", 0, b) + 1
513
            file_text = file_text[b:]
0.47.1 by Martin
Implement whole text search for fast failure on no match
514
        else:
515
            start = 1
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
516
        if opts.line_number:
0.46.5 by Martin
Delete now redundant duplicated loops in _file_grep
517
            for index, line in enumerate(file_text.splitlines()):
518
                if pattern in line:
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
519
                    line = line.decode(file_encoding, 'replace')
0.47.1 by Martin
Implement whole text search for fast failure on no match
520
                    writeline(lineno=index+start, line=line)
0.46.5 by Martin
Delete now redundant duplicated loops in _file_grep
521
        else:
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
522
            for line in file_text.splitlines():
523
                if pattern in line:
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
524
                    line = line.decode(file_encoding, 'replace')
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
525
                    writeline(line=line)
0.40.63 by Parth Malwankar
performance: moved conditionals out of core loop.
526
    else:
0.47.1 by Martin
Implement whole text search for fast failure on no match
527
        # Fast path on no match, the re module avoids bad behaviour in most
528
        # standard cases, but perhaps could try and detect backtracking
529
        # patterns here and avoid whole text search in those cases
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
530
        search = opts.patternc.search
0.47.4 by Martin
Scale back no-match fast path to avoid some behaviour changes with line endings
531
        if "$" not in pattern:
0.47.1 by Martin
Implement whole text search for fast failure on no match
532
            # GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
533
            #                through revisions as bazaar returns binary mode
534
            #                and trailing \r breaks $ as line ending match
535
            m = search(file_text)
536
            if m is None:
537
                return
538
            b = file_text.rfind("\n", 0, m.start()) + 1
539
            if opts.line_number:
0.47.4 by Martin
Scale back no-match fast path to avoid some behaviour changes with line endings
540
                start = file_text.count("\n", 0, b) + 1
541
            file_text = file_text[b:]
0.47.3 by Martin
Fix previously untested bug with regexp and line numbers introduced by optimisation
542
        else:
543
            start = 1
0.46.15 by Martin
Swap fixed_string/line_number branches in _file_grep
544
        if opts.line_number:
545
            for index, line in enumerate(file_text.splitlines()):
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
546
                if search(line):
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
547
                    line = line.decode(file_encoding, 'replace')
0.47.3 by Martin
Fix previously untested bug with regexp and line numbers introduced by optimisation
548
                    writeline(lineno=index+start, line=line)
0.40.83 by Parth Malwankar
added support for -F/--fixed-string.
549
        else:
550
            for line in file_text.splitlines():
0.46.16 by Martin
Save an attribute lookup on regexp object in inner loops
551
                if search(line):
0.40.137 by Parth Malwankar
(Martin [gz]) Add seperate output formatter
552
                    line = line.decode(file_encoding, 'replace')
0.46.10 by Martin
Move caching mechanism onto outputter rather than passing around dicts and lists
553
                    writeline(line=line)
0.40.139 by Parth Malwankar
(Martin [gz]) Added fast path for no match that avoids splitting the
554