/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/diff.py

  • Committer: Jelmer Vernooij
  • Date: 2019-10-20 15:03:13 UTC
  • mto: This revision was merged to the branch mainline in revision 7407.
  • Revision ID: jelmer@jelmer.uk-20191020150313-q06o6pncwr6ndu3t
Fix send with git.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2005-2014 Canonical Ltd.
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
from __future__ import absolute_import
 
18
 
 
19
import difflib
 
20
import os
 
21
import re
 
22
import sys
 
23
 
 
24
from .lazy_import import lazy_import
 
25
lazy_import(globals(), """
 
26
import errno
 
27
import patiencediff
 
28
import subprocess
 
29
import tempfile
 
30
 
 
31
from breezy import (
 
32
    cleanup,
 
33
    controldir,
 
34
    errors,
 
35
    osutils,
 
36
    textfile,
 
37
    timestamp,
 
38
    views,
 
39
    )
 
40
 
 
41
from breezy.workingtree import WorkingTree
 
42
from breezy.i18n import gettext
 
43
""")
 
44
 
 
45
from .registry import (
 
46
    Registry,
 
47
    )
 
48
from .sixish import text_type
 
49
from .trace import mutter, note, warning
 
50
from .tree import FileTimestampUnavailable
 
51
 
 
52
 
 
53
DEFAULT_CONTEXT_AMOUNT = 3
 
54
 
 
55
 
 
56
# TODO: Rather than building a changeset object, we should probably
 
57
# invoke callbacks on an object.  That object can either accumulate a
 
58
# list, write them out directly, etc etc.
 
59
 
 
60
 
 
61
class _PrematchedMatcher(difflib.SequenceMatcher):
 
62
    """Allow SequenceMatcher operations to use predetermined blocks"""
 
63
 
 
64
    def __init__(self, matching_blocks):
 
65
        difflib.SequenceMatcher(self, None, None)
 
66
        self.matching_blocks = matching_blocks
 
67
        self.opcodes = None
 
68
 
 
69
 
 
70
def internal_diff(old_label, oldlines, new_label, newlines, to_file,
 
71
                  allow_binary=False, sequence_matcher=None,
 
72
                  path_encoding='utf8', context_lines=DEFAULT_CONTEXT_AMOUNT):
 
73
    # FIXME: difflib is wrong if there is no trailing newline.
 
74
    # The syntax used by patch seems to be "\ No newline at
 
75
    # end of file" following the last diff line from that
 
76
    # file.  This is not trivial to insert into the
 
77
    # unified_diff output and it might be better to just fix
 
78
    # or replace that function.
 
79
 
 
80
    # In the meantime we at least make sure the patch isn't
 
81
    # mangled.
 
82
 
 
83
    if allow_binary is False:
 
84
        textfile.check_text_lines(oldlines)
 
85
        textfile.check_text_lines(newlines)
 
86
 
 
87
    if sequence_matcher is None:
 
88
        sequence_matcher = patiencediff.PatienceSequenceMatcher
 
89
    ud = unified_diff_bytes(
 
90
        oldlines, newlines,
 
91
        fromfile=old_label.encode(path_encoding, 'replace'),
 
92
        tofile=new_label.encode(path_encoding, 'replace'),
 
93
        n=context_lines, sequencematcher=sequence_matcher)
 
94
 
 
95
    ud = list(ud)
 
96
    if len(ud) == 0:  # Identical contents, nothing to do
 
97
        return
 
98
    # work-around for difflib being too smart for its own good
 
99
    # if /dev/null is "1,0", patch won't recognize it as /dev/null
 
100
    if not oldlines:
 
101
        ud[2] = ud[2].replace(b'-1,0', b'-0,0')
 
102
    elif not newlines:
 
103
        ud[2] = ud[2].replace(b'+1,0', b'+0,0')
 
104
 
 
105
    for line in ud:
 
106
        to_file.write(line)
 
107
        if not line.endswith(b'\n'):
 
108
            to_file.write(b"\n\\ No newline at end of file\n")
 
109
    to_file.write(b'\n')
 
110
 
 
111
 
 
112
def unified_diff_bytes(a, b, fromfile=b'', tofile=b'', fromfiledate=b'',
 
113
                       tofiledate=b'', n=3, lineterm=b'\n', sequencematcher=None):
 
114
    r"""
 
115
    Compare two sequences of lines; generate the delta as a unified diff.
 
116
 
 
117
    Unified diffs are a compact way of showing line changes and a few
 
118
    lines of context.  The number of context lines is set by 'n' which
 
119
    defaults to three.
 
120
 
 
121
    By default, the diff control lines (those with ---, +++, or @@) are
 
122
    created with a trailing newline.  This is helpful so that inputs
 
123
    created from file.readlines() result in diffs that are suitable for
 
124
    file.writelines() since both the inputs and outputs have trailing
 
125
    newlines.
 
126
 
 
127
    For inputs that do not have trailing newlines, set the lineterm
 
128
    argument to "" so that the output will be uniformly newline free.
 
129
 
 
130
    The unidiff format normally has a header for filenames and modification
 
131
    times.  Any or all of these may be specified using strings for
 
132
    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
 
133
    times are normally expressed in the format returned by time.ctime().
 
134
 
 
135
    Example:
 
136
 
 
137
    >>> for line in bytes_unified_diff(b'one two three four'.split(),
 
138
    ...             b'zero one tree four'.split(), b'Original', b'Current',
 
139
    ...             b'Sat Jan 26 23:30:50 1991', b'Fri Jun 06 10:20:52 2003',
 
140
    ...             lineterm=b''):
 
141
    ...     print line
 
142
    --- Original Sat Jan 26 23:30:50 1991
 
143
    +++ Current Fri Jun 06 10:20:52 2003
 
144
    @@ -1,4 +1,4 @@
 
145
    +zero
 
146
     one
 
147
    -two
 
148
    -three
 
149
    +tree
 
150
     four
 
151
    """
 
152
    if sequencematcher is None:
 
153
        sequencematcher = difflib.SequenceMatcher
 
154
 
 
155
    if fromfiledate:
 
156
        fromfiledate = b'\t' + bytes(fromfiledate)
 
157
    if tofiledate:
 
158
        tofiledate = b'\t' + bytes(tofiledate)
 
159
 
 
160
    started = False
 
161
    for group in sequencematcher(None, a, b).get_grouped_opcodes(n):
 
162
        if not started:
 
163
            yield b'--- %s%s%s' % (fromfile, fromfiledate, lineterm)
 
164
            yield b'+++ %s%s%s' % (tofile, tofiledate, lineterm)
 
165
            started = True
 
166
        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
 
167
        yield b"@@ -%d,%d +%d,%d @@%s" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1, lineterm)
 
168
        for tag, i1, i2, j1, j2 in group:
 
169
            if tag == 'equal':
 
170
                for line in a[i1:i2]:
 
171
                    yield b' ' + line
 
172
                continue
 
173
            if tag == 'replace' or tag == 'delete':
 
174
                for line in a[i1:i2]:
 
175
                    yield b'-' + line
 
176
            if tag == 'replace' or tag == 'insert':
 
177
                for line in b[j1:j2]:
 
178
                    yield b'+' + line
 
179
 
 
180
 
 
181
def _spawn_external_diff(diffcmd, capture_errors=True):
 
182
    """Spawn the external diff process, and return the child handle.
 
183
 
 
184
    :param diffcmd: The command list to spawn
 
185
    :param capture_errors: Capture stderr as well as setting LANG=C
 
186
        and LC_ALL=C. This lets us read and understand the output of diff,
 
187
        and respond to any errors.
 
188
    :return: A Popen object.
 
189
    """
 
190
    if capture_errors:
 
191
        # construct minimal environment
 
192
        env = {}
 
193
        path = os.environ.get('PATH')
 
194
        if path is not None:
 
195
            env['PATH'] = path
 
196
        env['LANGUAGE'] = 'C'   # on win32 only LANGUAGE has effect
 
197
        env['LANG'] = 'C'
 
198
        env['LC_ALL'] = 'C'
 
199
        stderr = subprocess.PIPE
 
200
    else:
 
201
        env = None
 
202
        stderr = None
 
203
 
 
204
    try:
 
205
        pipe = subprocess.Popen(diffcmd,
 
206
                                stdin=subprocess.PIPE,
 
207
                                stdout=subprocess.PIPE,
 
208
                                stderr=stderr,
 
209
                                env=env)
 
210
    except OSError as e:
 
211
        if e.errno == errno.ENOENT:
 
212
            raise errors.NoDiff(str(e))
 
213
        raise
 
214
 
 
215
    return pipe
 
216
 
 
217
 
 
218
# diff style options as of GNU diff v3.2
 
219
style_option_list = ['-c', '-C', '--context',
 
220
                     '-e', '--ed',
 
221
                     '-f', '--forward-ed',
 
222
                     '-q', '--brief',
 
223
                     '--normal',
 
224
                     '-n', '--rcs',
 
225
                     '-u', '-U', '--unified',
 
226
                     '-y', '--side-by-side',
 
227
                     '-D', '--ifdef']
 
228
 
 
229
 
 
230
def default_style_unified(diff_opts):
 
231
    """Default to unified diff style if alternative not specified in diff_opts.
 
232
 
 
233
        diff only allows one style to be specified; they don't override.
 
234
        Note that some of these take optargs, and the optargs can be
 
235
        directly appended to the options.
 
236
        This is only an approximate parser; it doesn't properly understand
 
237
        the grammar.
 
238
 
 
239
    :param diff_opts: List of options for external (GNU) diff.
 
240
    :return: List of options with default style=='unified'.
 
241
    """
 
242
    for s in style_option_list:
 
243
        for j in diff_opts:
 
244
            if j.startswith(s):
 
245
                break
 
246
        else:
 
247
            continue
 
248
        break
 
249
    else:
 
250
        diff_opts.append('-u')
 
251
    return diff_opts
 
252
 
 
253
 
 
254
def external_diff(old_label, oldlines, new_label, newlines, to_file,
 
255
                  diff_opts):
 
256
    """Display a diff by calling out to the external diff program."""
 
257
    # make sure our own output is properly ordered before the diff
 
258
    to_file.flush()
 
259
 
 
260
    oldtmp_fd, old_abspath = tempfile.mkstemp(prefix='brz-diff-old-')
 
261
    newtmp_fd, new_abspath = tempfile.mkstemp(prefix='brz-diff-new-')
 
262
    oldtmpf = os.fdopen(oldtmp_fd, 'wb')
 
263
    newtmpf = os.fdopen(newtmp_fd, 'wb')
 
264
 
 
265
    try:
 
266
        # TODO: perhaps a special case for comparing to or from the empty
 
267
        # sequence; can just use /dev/null on Unix
 
268
 
 
269
        # TODO: if either of the files being compared already exists as a
 
270
        # regular named file (e.g. in the working directory) then we can
 
271
        # compare directly to that, rather than copying it.
 
272
 
 
273
        oldtmpf.writelines(oldlines)
 
274
        newtmpf.writelines(newlines)
 
275
 
 
276
        oldtmpf.close()
 
277
        newtmpf.close()
 
278
 
 
279
        if not diff_opts:
 
280
            diff_opts = []
 
281
        if sys.platform == 'win32':
 
282
            # Popen doesn't do the proper encoding for external commands
 
283
            # Since we are dealing with an ANSI api, use mbcs encoding
 
284
            old_label = old_label.encode('mbcs')
 
285
            new_label = new_label.encode('mbcs')
 
286
        diffcmd = ['diff',
 
287
                   '--label', old_label,
 
288
                   old_abspath,
 
289
                   '--label', new_label,
 
290
                   new_abspath,
 
291
                   '--binary',
 
292
                   ]
 
293
 
 
294
        diff_opts = default_style_unified(diff_opts)
 
295
 
 
296
        if diff_opts:
 
297
            diffcmd.extend(diff_opts)
 
298
 
 
299
        pipe = _spawn_external_diff(diffcmd, capture_errors=True)
 
300
        out, err = pipe.communicate()
 
301
        rc = pipe.returncode
 
302
 
 
303
        # internal_diff() adds a trailing newline, add one here for consistency
 
304
        out += b'\n'
 
305
        if rc == 2:
 
306
            # 'diff' gives retcode == 2 for all sorts of errors
 
307
            # one of those is 'Binary files differ'.
 
308
            # Bad options could also be the problem.
 
309
            # 'Binary files' is not a real error, so we suppress that error.
 
310
            lang_c_out = out
 
311
 
 
312
            # Since we got here, we want to make sure to give an i18n error
 
313
            pipe = _spawn_external_diff(diffcmd, capture_errors=False)
 
314
            out, err = pipe.communicate()
 
315
 
 
316
            # Write out the new i18n diff response
 
317
            to_file.write(out + b'\n')
 
318
            if pipe.returncode != 2:
 
319
                raise errors.BzrError(
 
320
                    'external diff failed with exit code 2'
 
321
                    ' when run with LANG=C and LC_ALL=C,'
 
322
                    ' but not when run natively: %r' % (diffcmd,))
 
323
 
 
324
            first_line = lang_c_out.split(b'\n', 1)[0]
 
325
            # Starting with diffutils 2.8.4 the word "binary" was dropped.
 
326
            m = re.match(b'^(binary )?files.*differ$', first_line, re.I)
 
327
            if m is None:
 
328
                raise errors.BzrError('external diff failed with exit code 2;'
 
329
                                      ' command: %r' % (diffcmd,))
 
330
            else:
 
331
                # Binary files differ, just return
 
332
                return
 
333
 
 
334
        # If we got to here, we haven't written out the output of diff
 
335
        # do so now
 
336
        to_file.write(out)
 
337
        if rc not in (0, 1):
 
338
            # returns 1 if files differ; that's OK
 
339
            if rc < 0:
 
340
                msg = 'signal %d' % (-rc)
 
341
            else:
 
342
                msg = 'exit code %d' % rc
 
343
 
 
344
            raise errors.BzrError('external diff failed with %s; command: %r'
 
345
                                  % (msg, diffcmd))
 
346
 
 
347
    finally:
 
348
        oldtmpf.close()                 # and delete
 
349
        newtmpf.close()
 
350
 
 
351
        def cleanup(path):
 
352
            # Warn in case the file couldn't be deleted (in case windows still
 
353
            # holds the file open, but not if the files have already been
 
354
            # deleted)
 
355
            try:
 
356
                os.remove(path)
 
357
            except OSError as e:
 
358
                if e.errno not in (errno.ENOENT,):
 
359
                    warning('Failed to delete temporary file: %s %s', path, e)
 
360
 
 
361
        cleanup(old_abspath)
 
362
        cleanup(new_abspath)
 
363
 
 
364
 
 
365
def get_trees_and_branches_to_diff_locked(
 
366
        path_list, revision_specs, old_url, new_url, exit_stack, apply_view=True):
 
367
    """Get the trees and specific files to diff given a list of paths.
 
368
 
 
369
    This method works out the trees to be diff'ed and the files of
 
370
    interest within those trees.
 
371
 
 
372
    :param path_list:
 
373
        the list of arguments passed to the diff command
 
374
    :param revision_specs:
 
375
        Zero, one or two RevisionSpecs from the diff command line,
 
376
        saying what revisions to compare.
 
377
    :param old_url:
 
378
        The url of the old branch or tree. If None, the tree to use is
 
379
        taken from the first path, if any, or the current working tree.
 
380
    :param new_url:
 
381
        The url of the new branch or tree. If None, the tree to use is
 
382
        taken from the first path, if any, or the current working tree.
 
383
    :param exit_stack:
 
384
        an ExitStack object. get_trees_and_branches_to_diff
 
385
        will register cleanups that must be run to unlock the trees, etc.
 
386
    :param apply_view:
 
387
        if True and a view is set, apply the view or check that the paths
 
388
        are within it
 
389
    :returns:
 
390
        a tuple of (old_tree, new_tree, old_branch, new_branch,
 
391
        specific_files, extra_trees) where extra_trees is a sequence of
 
392
        additional trees to search in for file-ids.  The trees and branches
 
393
        will be read-locked until the cleanups registered via the exit_stack
 
394
        param are run.
 
395
    """
 
396
    # Get the old and new revision specs
 
397
    old_revision_spec = None
 
398
    new_revision_spec = None
 
399
    if revision_specs is not None:
 
400
        if len(revision_specs) > 0:
 
401
            old_revision_spec = revision_specs[0]
 
402
            if old_url is None:
 
403
                old_url = old_revision_spec.get_branch()
 
404
        if len(revision_specs) > 1:
 
405
            new_revision_spec = revision_specs[1]
 
406
            if new_url is None:
 
407
                new_url = new_revision_spec.get_branch()
 
408
 
 
409
    other_paths = []
 
410
    make_paths_wt_relative = True
 
411
    consider_relpath = True
 
412
    if path_list is None or len(path_list) == 0:
 
413
        # If no path is given, the current working tree is used
 
414
        default_location = u'.'
 
415
        consider_relpath = False
 
416
    elif old_url is not None and new_url is not None:
 
417
        other_paths = path_list
 
418
        make_paths_wt_relative = False
 
419
    else:
 
420
        default_location = path_list[0]
 
421
        other_paths = path_list[1:]
 
422
 
 
423
    def lock_tree_or_branch(wt, br):
 
424
        if wt is not None:
 
425
            exit_stack.enter_context(wt.lock_read())
 
426
        elif br is not None:
 
427
            exit_stack.enter_context(br.lock_read())
 
428
 
 
429
    # Get the old location
 
430
    specific_files = []
 
431
    if old_url is None:
 
432
        old_url = default_location
 
433
    working_tree, branch, relpath = \
 
434
        controldir.ControlDir.open_containing_tree_or_branch(old_url)
 
435
    lock_tree_or_branch(working_tree, branch)
 
436
    if consider_relpath and relpath != '':
 
437
        if working_tree is not None and apply_view:
 
438
            views.check_path_in_view(working_tree, relpath)
 
439
        specific_files.append(relpath)
 
440
    old_tree = _get_tree_to_diff(old_revision_spec, working_tree, branch)
 
441
    old_branch = branch
 
442
 
 
443
    # Get the new location
 
444
    if new_url is None:
 
445
        new_url = default_location
 
446
    if new_url != old_url:
 
447
        working_tree, branch, relpath = \
 
448
            controldir.ControlDir.open_containing_tree_or_branch(new_url)
 
449
        lock_tree_or_branch(working_tree, branch)
 
450
        if consider_relpath and relpath != '':
 
451
            if working_tree is not None and apply_view:
 
452
                views.check_path_in_view(working_tree, relpath)
 
453
            specific_files.append(relpath)
 
454
    new_tree = _get_tree_to_diff(new_revision_spec, working_tree, branch,
 
455
                                 basis_is_default=working_tree is None)
 
456
    new_branch = branch
 
457
 
 
458
    # Get the specific files (all files is None, no files is [])
 
459
    if make_paths_wt_relative and working_tree is not None:
 
460
        other_paths = working_tree.safe_relpath_files(
 
461
            other_paths,
 
462
            apply_view=apply_view)
 
463
    specific_files.extend(other_paths)
 
464
    if len(specific_files) == 0:
 
465
        specific_files = None
 
466
        if (working_tree is not None and working_tree.supports_views() and
 
467
                apply_view):
 
468
            view_files = working_tree.views.lookup_view()
 
469
            if view_files:
 
470
                specific_files = view_files
 
471
                view_str = views.view_display_str(view_files)
 
472
                note(gettext("*** Ignoring files outside view. View is %s") % view_str)
 
473
 
 
474
    # Get extra trees that ought to be searched for file-ids
 
475
    extra_trees = None
 
476
    if working_tree is not None and working_tree not in (old_tree, new_tree):
 
477
        extra_trees = (working_tree,)
 
478
    return (old_tree, new_tree, old_branch, new_branch,
 
479
            specific_files, extra_trees)
 
480
 
 
481
 
 
482
def _get_tree_to_diff(spec, tree=None, branch=None, basis_is_default=True):
 
483
    if branch is None and tree is not None:
 
484
        branch = tree.branch
 
485
    if spec is None or spec.spec is None:
 
486
        if basis_is_default:
 
487
            if tree is not None:
 
488
                return tree.basis_tree()
 
489
            else:
 
490
                return branch.basis_tree()
 
491
        else:
 
492
            return tree
 
493
    return spec.as_tree(branch)
 
494
 
 
495
 
 
496
def show_diff_trees(old_tree, new_tree, to_file, specific_files=None,
 
497
                    external_diff_options=None,
 
498
                    old_label='a/', new_label='b/',
 
499
                    extra_trees=None,
 
500
                    path_encoding='utf8',
 
501
                    using=None,
 
502
                    format_cls=None,
 
503
                    context=DEFAULT_CONTEXT_AMOUNT):
 
504
    """Show in text form the changes from one tree to another.
 
505
 
 
506
    :param to_file: The output stream.
 
507
    :param specific_files: Include only changes to these files - None for all
 
508
        changes.
 
509
    :param external_diff_options: If set, use an external GNU diff and pass
 
510
        these options.
 
511
    :param extra_trees: If set, more Trees to use for looking up file ids
 
512
    :param path_encoding: If set, the path will be encoded as specified,
 
513
        otherwise is supposed to be utf8
 
514
    :param format_cls: Formatter class (DiffTree subclass)
 
515
    """
 
516
    if context is None:
 
517
        context = DEFAULT_CONTEXT_AMOUNT
 
518
    if format_cls is None:
 
519
        format_cls = DiffTree
 
520
    with cleanup.ExitStack() as exit_stack:
 
521
        exit_stack.enter_context(old_tree.lock_read())
 
522
        if extra_trees is not None:
 
523
            for tree in extra_trees:
 
524
                exit_stack.enter_context(tree.lock_read())
 
525
        exit_stack.enter_context(new_tree.lock_read())
 
526
        differ = format_cls.from_trees_options(old_tree, new_tree, to_file,
 
527
                                               path_encoding,
 
528
                                               external_diff_options,
 
529
                                               old_label, new_label, using,
 
530
                                               context_lines=context)
 
531
        return differ.show_diff(specific_files, extra_trees)
 
532
 
 
533
 
 
534
def _patch_header_date(tree, path):
 
535
    """Returns a timestamp suitable for use in a patch header."""
 
536
    try:
 
537
        mtime = tree.get_file_mtime(path)
 
538
    except FileTimestampUnavailable:
 
539
        mtime = 0
 
540
    return timestamp.format_patch_date(mtime)
 
541
 
 
542
 
 
543
def get_executable_change(old_is_x, new_is_x):
 
544
    descr = {True: b"+x", False: b"-x", None: b"??"}
 
545
    if old_is_x != new_is_x:
 
546
        return [b"%s to %s" % (descr[old_is_x], descr[new_is_x],)]
 
547
    else:
 
548
        return []
 
549
 
 
550
 
 
551
class DiffPath(object):
 
552
    """Base type for command object that compare files"""
 
553
 
 
554
    # The type or contents of the file were unsuitable for diffing
 
555
    CANNOT_DIFF = 'CANNOT_DIFF'
 
556
    # The file has changed in a semantic way
 
557
    CHANGED = 'CHANGED'
 
558
    # The file content may have changed, but there is no semantic change
 
559
    UNCHANGED = 'UNCHANGED'
 
560
 
 
561
    def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8'):
 
562
        """Constructor.
 
563
 
 
564
        :param old_tree: The tree to show as the old tree in the comparison
 
565
        :param new_tree: The tree to show as new in the comparison
 
566
        :param to_file: The file to write comparison data to
 
567
        :param path_encoding: The character encoding to write paths in
 
568
        """
 
569
        self.old_tree = old_tree
 
570
        self.new_tree = new_tree
 
571
        self.to_file = to_file
 
572
        self.path_encoding = path_encoding
 
573
 
 
574
    def finish(self):
 
575
        pass
 
576
 
 
577
    @classmethod
 
578
    def from_diff_tree(klass, diff_tree):
 
579
        return klass(diff_tree.old_tree, diff_tree.new_tree,
 
580
                     diff_tree.to_file, diff_tree.path_encoding)
 
581
 
 
582
    @staticmethod
 
583
    def _diff_many(differs, old_path, new_path, old_kind, new_kind):
 
584
        for file_differ in differs:
 
585
            result = file_differ.diff(old_path, new_path, old_kind, new_kind)
 
586
            if result is not DiffPath.CANNOT_DIFF:
 
587
                return result
 
588
        else:
 
589
            return DiffPath.CANNOT_DIFF
 
590
 
 
591
 
 
592
class DiffKindChange(object):
 
593
    """Special differ for file kind changes.
 
594
 
 
595
    Represents kind change as deletion + creation.  Uses the other differs
 
596
    to do this.
 
597
    """
 
598
 
 
599
    def __init__(self, differs):
 
600
        self.differs = differs
 
601
 
 
602
    def finish(self):
 
603
        pass
 
604
 
 
605
    @classmethod
 
606
    def from_diff_tree(klass, diff_tree):
 
607
        return klass(diff_tree.differs)
 
608
 
 
609
    def diff(self, old_path, new_path, old_kind, new_kind):
 
610
        """Perform comparison
 
611
 
 
612
        :param old_path: Path of the file in the old tree
 
613
        :param new_path: Path of the file in the new tree
 
614
        :param old_kind: Old file-kind of the file
 
615
        :param new_kind: New file-kind of the file
 
616
        """
 
617
        if None in (old_kind, new_kind):
 
618
            return DiffPath.CANNOT_DIFF
 
619
        result = DiffPath._diff_many(
 
620
            self.differs, old_path, new_path, old_kind, None)
 
621
        if result is DiffPath.CANNOT_DIFF:
 
622
            return result
 
623
        return DiffPath._diff_many(
 
624
            self.differs, old_path, new_path, None, new_kind)
 
625
 
 
626
 
 
627
class DiffTreeReference(DiffPath):
 
628
 
 
629
    def diff(self, old_path, new_path, old_kind, new_kind):
 
630
        """Perform comparison between two tree references.  (dummy)
 
631
 
 
632
        """
 
633
        if 'tree-reference' not in (old_kind, new_kind):
 
634
            return self.CANNOT_DIFF
 
635
        if old_kind not in ('tree-reference', None):
 
636
            return self.CANNOT_DIFF
 
637
        if new_kind not in ('tree-reference', None):
 
638
            return self.CANNOT_DIFF
 
639
        return self.CHANGED
 
640
 
 
641
 
 
642
class DiffDirectory(DiffPath):
 
643
 
 
644
    def diff(self, old_path, new_path, old_kind, new_kind):
 
645
        """Perform comparison between two directories.  (dummy)
 
646
 
 
647
        """
 
648
        if 'directory' not in (old_kind, new_kind):
 
649
            return self.CANNOT_DIFF
 
650
        if old_kind not in ('directory', None):
 
651
            return self.CANNOT_DIFF
 
652
        if new_kind not in ('directory', None):
 
653
            return self.CANNOT_DIFF
 
654
        return self.CHANGED
 
655
 
 
656
 
 
657
class DiffSymlink(DiffPath):
 
658
 
 
659
    def diff(self, old_path, new_path, old_kind, new_kind):
 
660
        """Perform comparison between two symlinks
 
661
 
 
662
        :param old_path: Path of the file in the old tree
 
663
        :param new_path: Path of the file in the new tree
 
664
        :param old_kind: Old file-kind of the file
 
665
        :param new_kind: New file-kind of the file
 
666
        """
 
667
        if 'symlink' not in (old_kind, new_kind):
 
668
            return self.CANNOT_DIFF
 
669
        if old_kind == 'symlink':
 
670
            old_target = self.old_tree.get_symlink_target(old_path)
 
671
        elif old_kind is None:
 
672
            old_target = None
 
673
        else:
 
674
            return self.CANNOT_DIFF
 
675
        if new_kind == 'symlink':
 
676
            new_target = self.new_tree.get_symlink_target(new_path)
 
677
        elif new_kind is None:
 
678
            new_target = None
 
679
        else:
 
680
            return self.CANNOT_DIFF
 
681
        return self.diff_symlink(old_target, new_target)
 
682
 
 
683
    def diff_symlink(self, old_target, new_target):
 
684
        if old_target is None:
 
685
            self.to_file.write(b'=== target is \'%s\'\n' %
 
686
                               new_target.encode(self.path_encoding, 'replace'))
 
687
        elif new_target is None:
 
688
            self.to_file.write(b'=== target was \'%s\'\n' %
 
689
                               old_target.encode(self.path_encoding, 'replace'))
 
690
        else:
 
691
            self.to_file.write(b'=== target changed \'%s\' => \'%s\'\n' %
 
692
                               (old_target.encode(self.path_encoding, 'replace'),
 
693
                                new_target.encode(self.path_encoding, 'replace')))
 
694
        return self.CHANGED
 
695
 
 
696
 
 
697
class DiffText(DiffPath):
 
698
 
 
699
    # GNU Patch uses the epoch date to detect files that are being added
 
700
    # or removed in a diff.
 
701
    EPOCH_DATE = '1970-01-01 00:00:00 +0000'
 
702
 
 
703
    def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8',
 
704
                 old_label='', new_label='', text_differ=internal_diff,
 
705
                 context_lines=DEFAULT_CONTEXT_AMOUNT):
 
706
        DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding)
 
707
        self.text_differ = text_differ
 
708
        self.old_label = old_label
 
709
        self.new_label = new_label
 
710
        self.path_encoding = path_encoding
 
711
        self.context_lines = context_lines
 
712
 
 
713
    def diff(self, old_path, new_path, old_kind, new_kind):
 
714
        """Compare two files in unified diff format
 
715
 
 
716
        :param old_path: Path of the file in the old tree
 
717
        :param new_path: Path of the file in the new tree
 
718
        :param old_kind: Old file-kind of the file
 
719
        :param new_kind: New file-kind of the file
 
720
        """
 
721
        if 'file' not in (old_kind, new_kind):
 
722
            return self.CANNOT_DIFF
 
723
        if old_kind == 'file':
 
724
            old_date = _patch_header_date(self.old_tree, old_path)
 
725
        elif old_kind is None:
 
726
            old_date = self.EPOCH_DATE
 
727
        else:
 
728
            return self.CANNOT_DIFF
 
729
        if new_kind == 'file':
 
730
            new_date = _patch_header_date(self.new_tree, new_path)
 
731
        elif new_kind is None:
 
732
            new_date = self.EPOCH_DATE
 
733
        else:
 
734
            return self.CANNOT_DIFF
 
735
        from_label = '%s%s\t%s' % (self.old_label, old_path,
 
736
                                   old_date)
 
737
        to_label = '%s%s\t%s' % (self.new_label, new_path,
 
738
                                 new_date)
 
739
        return self.diff_text(old_path, new_path, from_label, to_label)
 
740
 
 
741
    def diff_text(self, from_path, to_path, from_label, to_label):
 
742
        """Diff the content of given files in two trees
 
743
 
 
744
        :param from_path: The path in the from tree. If None,
 
745
            the file is not present in the from tree.
 
746
        :param to_path: The path in the to tree. This may refer
 
747
            to a different file from from_path.  If None,
 
748
            the file is not present in the to tree.
 
749
        """
 
750
        def _get_text(tree, path):
 
751
            if path is None:
 
752
                return []
 
753
            try:
 
754
                return tree.get_file_lines(path)
 
755
            except errors.NoSuchFile:
 
756
                return []
 
757
        try:
 
758
            from_text = _get_text(self.old_tree, from_path)
 
759
            to_text = _get_text(self.new_tree, to_path)
 
760
            self.text_differ(from_label, from_text, to_label, to_text,
 
761
                             self.to_file, path_encoding=self.path_encoding,
 
762
                             context_lines=self.context_lines)
 
763
        except errors.BinaryFile:
 
764
            self.to_file.write(
 
765
                ("Binary files %s%s and %s%s differ\n" %
 
766
                 (self.old_label, from_path, self.new_label, to_path)).encode(self.path_encoding, 'replace'))
 
767
        return self.CHANGED
 
768
 
 
769
 
 
770
class DiffFromTool(DiffPath):
 
771
 
 
772
    def __init__(self, command_template, old_tree, new_tree, to_file,
 
773
                 path_encoding='utf-8'):
 
774
        DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding)
 
775
        self.command_template = command_template
 
776
        self._root = osutils.mkdtemp(prefix='brz-diff-')
 
777
 
 
778
    @classmethod
 
779
    def from_string(klass, command_template, old_tree, new_tree, to_file,
 
780
                    path_encoding='utf-8'):
 
781
        return klass(command_template, old_tree, new_tree, to_file,
 
782
                     path_encoding)
 
783
 
 
784
    @classmethod
 
785
    def make_from_diff_tree(klass, command_string, external_diff_options=None):
 
786
        def from_diff_tree(diff_tree):
 
787
            full_command_string = [command_string]
 
788
            if external_diff_options is not None:
 
789
                full_command_string += ' ' + external_diff_options
 
790
            return klass.from_string(full_command_string, diff_tree.old_tree,
 
791
                                     diff_tree.new_tree, diff_tree.to_file)
 
792
        return from_diff_tree
 
793
 
 
794
    def _get_command(self, old_path, new_path):
 
795
        my_map = {'old_path': old_path, 'new_path': new_path}
 
796
        command = [t.format(**my_map) for t in
 
797
                   self.command_template]
 
798
        if sys.platform == 'win32':  # Popen doesn't accept unicode on win32
 
799
            command_encoded = []
 
800
            for c in command:
 
801
                if isinstance(c, text_type):
 
802
                    command_encoded.append(c.encode('mbcs'))
 
803
                else:
 
804
                    command_encoded.append(c)
 
805
            return command_encoded
 
806
        else:
 
807
            return command
 
808
 
 
809
    def _execute(self, old_path, new_path):
 
810
        command = self._get_command(old_path, new_path)
 
811
        try:
 
812
            proc = subprocess.Popen(command, stdout=subprocess.PIPE,
 
813
                                    cwd=self._root)
 
814
        except OSError as e:
 
815
            if e.errno == errno.ENOENT:
 
816
                raise errors.ExecutableMissing(command[0])
 
817
            else:
 
818
                raise
 
819
        self.to_file.write(proc.stdout.read())
 
820
        proc.stdout.close()
 
821
        return proc.wait()
 
822
 
 
823
    def _try_symlink_root(self, tree, prefix):
 
824
        if (getattr(tree, 'abspath', None) is None or
 
825
                not osutils.host_os_dereferences_symlinks()):
 
826
            return False
 
827
        try:
 
828
            os.symlink(tree.abspath(''), osutils.pathjoin(self._root, prefix))
 
829
        except OSError as e:
 
830
            if e.errno != errno.EEXIST:
 
831
                raise
 
832
        return True
 
833
 
 
834
    @staticmethod
 
835
    def _fenc():
 
836
        """Returns safe encoding for passing file path to diff tool"""
 
837
        if sys.platform == 'win32':
 
838
            return 'mbcs'
 
839
        else:
 
840
            # Don't fallback to 'utf-8' because subprocess may not be able to
 
841
            # handle utf-8 correctly when locale is not utf-8.
 
842
            return sys.getfilesystemencoding() or 'ascii'
 
843
 
 
844
    def _is_safepath(self, path):
 
845
        """Return true if `path` may be able to pass to subprocess."""
 
846
        fenc = self._fenc()
 
847
        try:
 
848
            return path == path.encode(fenc).decode(fenc)
 
849
        except UnicodeError:
 
850
            return False
 
851
 
 
852
    def _safe_filename(self, prefix, relpath):
 
853
        """Replace unsafe character in `relpath` then join `self._root`,
 
854
        `prefix` and `relpath`."""
 
855
        fenc = self._fenc()
 
856
        # encoded_str.replace('?', '_') may break multibyte char.
 
857
        # So we should encode, decode, then replace(u'?', u'_')
 
858
        relpath_tmp = relpath.encode(fenc, 'replace').decode(fenc, 'replace')
 
859
        relpath_tmp = relpath_tmp.replace(u'?', u'_')
 
860
        return osutils.pathjoin(self._root, prefix, relpath_tmp)
 
861
 
 
862
    def _write_file(self, relpath, tree, prefix, force_temp=False,
 
863
                    allow_write=False):
 
864
        if not force_temp and isinstance(tree, WorkingTree):
 
865
            full_path = tree.abspath(relpath)
 
866
            if self._is_safepath(full_path):
 
867
                return full_path
 
868
 
 
869
        full_path = self._safe_filename(prefix, relpath)
 
870
        if not force_temp and self._try_symlink_root(tree, prefix):
 
871
            return full_path
 
872
        parent_dir = osutils.dirname(full_path)
 
873
        try:
 
874
            os.makedirs(parent_dir)
 
875
        except OSError as e:
 
876
            if e.errno != errno.EEXIST:
 
877
                raise
 
878
        with tree.get_file(relpath) as source, \
 
879
                open(full_path, 'wb') as target:
 
880
            osutils.pumpfile(source, target)
 
881
        try:
 
882
            mtime = tree.get_file_mtime(relpath)
 
883
        except FileTimestampUnavailable:
 
884
            pass
 
885
        else:
 
886
            os.utime(full_path, (mtime, mtime))
 
887
        if not allow_write:
 
888
            osutils.make_readonly(full_path)
 
889
        return full_path
 
890
 
 
891
    def _prepare_files(self, old_path, new_path, force_temp=False,
 
892
                       allow_write_new=False):
 
893
        old_disk_path = self._write_file(
 
894
            old_path, self.old_tree, 'old', force_temp)
 
895
        new_disk_path = self._write_file(
 
896
            new_path, self.new_tree, 'new', force_temp,
 
897
            allow_write=allow_write_new)
 
898
        return old_disk_path, new_disk_path
 
899
 
 
900
    def finish(self):
 
901
        try:
 
902
            osutils.rmtree(self._root)
 
903
        except OSError as e:
 
904
            if e.errno != errno.ENOENT:
 
905
                mutter("The temporary directory \"%s\" was not "
 
906
                       "cleanly removed: %s." % (self._root, e))
 
907
 
 
908
    def diff(self, old_path, new_path, old_kind, new_kind):
 
909
        if (old_kind, new_kind) != ('file', 'file'):
 
910
            return DiffPath.CANNOT_DIFF
 
911
        (old_disk_path, new_disk_path) = self._prepare_files(
 
912
            old_path, new_path)
 
913
        self._execute(old_disk_path, new_disk_path)
 
914
 
 
915
    def edit_file(self, old_path, new_path):
 
916
        """Use this tool to edit a file.
 
917
 
 
918
        A temporary copy will be edited, and the new contents will be
 
919
        returned.
 
920
 
 
921
        :return: The new contents of the file.
 
922
        """
 
923
        old_abs_path, new_abs_path = self._prepare_files(
 
924
            old_path, new_path, allow_write_new=True, force_temp=True)
 
925
        command = self._get_command(old_abs_path, new_abs_path)
 
926
        subprocess.call(command, cwd=self._root)
 
927
        with open(new_abs_path, 'rb') as new_file:
 
928
            return new_file.read()
 
929
 
 
930
 
 
931
class DiffTree(object):
 
932
    """Provides textual representations of the difference between two trees.
 
933
 
 
934
    A DiffTree examines two trees and where a file-id has altered
 
935
    between them, generates a textual representation of the difference.
 
936
    DiffTree uses a sequence of DiffPath objects which are each
 
937
    given the opportunity to handle a given altered fileid. The list
 
938
    of DiffPath objects can be extended globally by appending to
 
939
    DiffTree.diff_factories, or for a specific diff operation by
 
940
    supplying the extra_factories option to the appropriate method.
 
941
    """
 
942
 
 
943
    # list of factories that can provide instances of DiffPath objects
 
944
    # may be extended by plugins.
 
945
    diff_factories = [DiffSymlink.from_diff_tree,
 
946
                      DiffDirectory.from_diff_tree,
 
947
                      DiffTreeReference.from_diff_tree]
 
948
 
 
949
    def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8',
 
950
                 diff_text=None, extra_factories=None):
 
951
        """Constructor
 
952
 
 
953
        :param old_tree: Tree to show as old in the comparison
 
954
        :param new_tree: Tree to show as new in the comparison
 
955
        :param to_file: File to write comparision to
 
956
        :param path_encoding: Character encoding to write paths in
 
957
        :param diff_text: DiffPath-type object to use as a last resort for
 
958
            diffing text files.
 
959
        :param extra_factories: Factories of DiffPaths to try before any other
 
960
            DiffPaths"""
 
961
        if diff_text is None:
 
962
            diff_text = DiffText(old_tree, new_tree, to_file, path_encoding,
 
963
                                 '', '', internal_diff)
 
964
        self.old_tree = old_tree
 
965
        self.new_tree = new_tree
 
966
        self.to_file = to_file
 
967
        self.path_encoding = path_encoding
 
968
        self.differs = []
 
969
        if extra_factories is not None:
 
970
            self.differs.extend(f(self) for f in extra_factories)
 
971
        self.differs.extend(f(self) for f in self.diff_factories)
 
972
        self.differs.extend([diff_text, DiffKindChange.from_diff_tree(self)])
 
973
 
 
974
    @classmethod
 
975
    def from_trees_options(klass, old_tree, new_tree, to_file,
 
976
                           path_encoding, external_diff_options, old_label,
 
977
                           new_label, using, context_lines):
 
978
        """Factory for producing a DiffTree.
 
979
 
 
980
        Designed to accept options used by show_diff_trees.
 
981
 
 
982
        :param old_tree: The tree to show as old in the comparison
 
983
        :param new_tree: The tree to show as new in the comparison
 
984
        :param to_file: File to write comparisons to
 
985
        :param path_encoding: Character encoding to use for writing paths
 
986
        :param external_diff_options: If supplied, use the installed diff
 
987
            binary to perform file comparison, using supplied options.
 
988
        :param old_label: Prefix to use for old file labels
 
989
        :param new_label: Prefix to use for new file labels
 
990
        :param using: Commandline to use to invoke an external diff tool
 
991
        """
 
992
        if using is not None:
 
993
            extra_factories = [DiffFromTool.make_from_diff_tree(
 
994
                using, external_diff_options)]
 
995
        else:
 
996
            extra_factories = []
 
997
        if external_diff_options:
 
998
            opts = external_diff_options.split()
 
999
 
 
1000
            def diff_file(olab, olines, nlab, nlines, to_file, path_encoding=None, context_lines=None):
 
1001
                """:param path_encoding: not used but required
 
1002
                        to match the signature of internal_diff.
 
1003
                """
 
1004
                external_diff(olab, olines, nlab, nlines, to_file, opts)
 
1005
        else:
 
1006
            diff_file = internal_diff
 
1007
        diff_text = DiffText(old_tree, new_tree, to_file, path_encoding,
 
1008
                             old_label, new_label, diff_file, context_lines=context_lines)
 
1009
        return klass(old_tree, new_tree, to_file, path_encoding, diff_text,
 
1010
                     extra_factories)
 
1011
 
 
1012
    def show_diff(self, specific_files, extra_trees=None):
 
1013
        """Write tree diff to self.to_file
 
1014
 
 
1015
        :param specific_files: the specific files to compare (recursive)
 
1016
        :param extra_trees: extra trees to use for mapping paths to file_ids
 
1017
        """
 
1018
        try:
 
1019
            return self._show_diff(specific_files, extra_trees)
 
1020
        finally:
 
1021
            for differ in self.differs:
 
1022
                differ.finish()
 
1023
 
 
1024
    def _show_diff(self, specific_files, extra_trees):
 
1025
        # TODO: Generation of pseudo-diffs for added/deleted files could
 
1026
        # be usefully made into a much faster special case.
 
1027
        iterator = self.new_tree.iter_changes(self.old_tree,
 
1028
                                              specific_files=specific_files,
 
1029
                                              extra_trees=extra_trees,
 
1030
                                              require_versioned=True)
 
1031
        has_changes = 0
 
1032
 
 
1033
        def changes_key(change):
 
1034
            old_path, new_path = change.path
 
1035
            path = new_path
 
1036
            if path is None:
 
1037
                path = old_path
 
1038
            return path
 
1039
 
 
1040
        def get_encoded_path(path):
 
1041
            if path is not None:
 
1042
                return path.encode(self.path_encoding, "replace")
 
1043
        for change in sorted(iterator, key=changes_key):
 
1044
            # The root does not get diffed, and items with no known kind (that
 
1045
            # is, missing) in both trees are skipped as well.
 
1046
            if change.parent_id == (None, None) or change.kind == (None, None):
 
1047
                continue
 
1048
            if change.kind[0] == 'symlink' and not self.new_tree.supports_symlinks():
 
1049
                warning(
 
1050
                    'Ignoring "%s" as symlinks are not '
 
1051
                    'supported on this filesystem.' % (change.path[0],))
 
1052
                continue
 
1053
            oldpath, newpath = change.path
 
1054
            oldpath_encoded = get_encoded_path(change.path[0])
 
1055
            newpath_encoded = get_encoded_path(change.path[1])
 
1056
            old_present = (change.kind[0] is not None and change.versioned[0])
 
1057
            new_present = (change.kind[1] is not None and change.versioned[1])
 
1058
            executable = change.executable
 
1059
            kind = change.kind
 
1060
            renamed = (change.parent_id[0], change.name[0]) != (change.parent_id[1], change.name[1])
 
1061
 
 
1062
            properties_changed = []
 
1063
            properties_changed.extend(
 
1064
                get_executable_change(executable[0], executable[1]))
 
1065
 
 
1066
            if properties_changed:
 
1067
                prop_str = b" (properties changed: %s)" % (
 
1068
                    b", ".join(properties_changed),)
 
1069
            else:
 
1070
                prop_str = b""
 
1071
 
 
1072
            if (old_present, new_present) == (True, False):
 
1073
                self.to_file.write(b"=== removed %s '%s'\n" %
 
1074
                                   (kind[0].encode('ascii'), oldpath_encoded))
 
1075
                newpath = oldpath
 
1076
            elif (old_present, new_present) == (False, True):
 
1077
                self.to_file.write(b"=== added %s '%s'\n" %
 
1078
                                   (kind[1].encode('ascii'), newpath_encoded))
 
1079
                oldpath = newpath
 
1080
            elif renamed:
 
1081
                self.to_file.write(b"=== renamed %s '%s' => '%s'%s\n" %
 
1082
                                   (kind[0].encode('ascii'), oldpath_encoded, newpath_encoded, prop_str))
 
1083
            else:
 
1084
                # if it was produced by iter_changes, it must be
 
1085
                # modified *somehow*, either content or execute bit.
 
1086
                self.to_file.write(b"=== modified %s '%s'%s\n" % (kind[0].encode('ascii'),
 
1087
                                                                  newpath_encoded, prop_str))
 
1088
            if change.changed_content:
 
1089
                self._diff(oldpath, newpath, kind[0], kind[1])
 
1090
                has_changes = 1
 
1091
            if renamed:
 
1092
                has_changes = 1
 
1093
        return has_changes
 
1094
 
 
1095
    def diff(self, old_path, new_path):
 
1096
        """Perform a diff of a single file
 
1097
 
 
1098
        :param old_path: The path of the file in the old tree
 
1099
        :param new_path: The path of the file in the new tree
 
1100
        """
 
1101
        if old_path is None:
 
1102
            old_kind = None
 
1103
        else:
 
1104
            old_kind = self.old_tree.kind(old_path)
 
1105
        if new_path is None:
 
1106
            new_kind = None
 
1107
        else:
 
1108
            new_kind = self.new_tree.kind(new_path)
 
1109
        self._diff(old_path, new_path, old_kind, new_kind)
 
1110
 
 
1111
    def _diff(self, old_path, new_path, old_kind, new_kind):
 
1112
        result = DiffPath._diff_many(
 
1113
            self.differs, old_path, new_path, old_kind, new_kind)
 
1114
        if result is DiffPath.CANNOT_DIFF:
 
1115
            error_path = new_path
 
1116
            if error_path is None:
 
1117
                error_path = old_path
 
1118
            raise errors.NoDiffFound(error_path)
 
1119
 
 
1120
 
 
1121
format_registry = Registry()
 
1122
format_registry.register('default', DiffTree)