/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to brzlib/dirstate.py

  • Committer: Jelmer Vernooij
  • Date: 2017-05-21 12:41:27 UTC
  • mto: This revision was merged to the branch mainline in revision 6623.
  • Revision ID: jelmer@jelmer.uk-20170521124127-iv8etg0vwymyai6y
s/bzr/brz/ in apport config.

Show diffs side-by-side

added added

removed removed

Lines of Context:
28
28
    WHOLE_NUMBER = {digit}, digit;
29
29
    BOOLEAN = "y" | "n";
30
30
    REVISION_ID = a non-empty utf8 string;
31
 
 
 
31
    
32
32
    dirstate format = header line, full checksum, row count, parent details,
33
33
     ghost_details, entries;
34
34
    header line = "#bazaar dirstate flat format 3", NL;
78
78
 
79
79
and the entries in there are::
80
80
 
81
 
    entries[0][0]: b''
82
 
    entries[0][1]: b''
 
81
    entries[0][0]: ''
 
82
    entries[0][1]: ''
83
83
    entries[0][2]: file_id
84
84
    entries[1][0]: The tree data for the current tree for this fileid at /
85
85
    etc.
86
86
 
87
87
Kinds::
88
88
 
89
 
   b'r' is a relocated entry: This path is not present in this tree with this
 
89
    'r' is a relocated entry: This path is not present in this tree with this
90
90
        id, but the id can be found at another location. The fingerprint is
91
91
        used to point to the target location.
92
 
   b'a' is an absent entry: In that tree the id is not present at this path.
93
 
   b'd' is a directory entry: This path in this tree is a directory with the
 
92
    'a' is an absent entry: In that tree the id is not present at this path.
 
93
    'd' is a directory entry: This path in this tree is a directory with the
94
94
        current file id. There is no fingerprint for directories.
95
 
   b'f' is a file entry: As for directory, but it's a file. The fingerprint is
 
95
    'f' is a file entry: As for directory, but it's a file. The fingerprint is
96
96
        the sha1 value of the file's canonical form, i.e. after any read
97
97
        filters have been applied to the convenience form stored in the working
98
98
        tree.
99
 
   b'l' is a symlink entry: As for directory, but a symlink. The fingerprint is
 
99
    'l' is a symlink entry: As for directory, but a symlink. The fingerprint is
100
100
        the link target.
101
 
   b't' is a reference to a nested subtree; the fingerprint is the referenced
 
101
    't' is a reference to a nested subtree; the fingerprint is the referenced
102
102
        revision.
103
103
 
104
104
Ordering:
166
166
 
167
167
 vector of all directories, and vector of the childen ?
168
168
   i.e.
169
 
     root_entries = (direntry for root, [parent_direntries_for_root]),
 
169
     root_entrie = (direntry for root, [parent_direntries_for_root]),
170
170
     dirblocks = [
171
171
     ('', ['data for achild', 'data for bchild', 'data for cchild'])
172
172
     ('dir', ['achild', 'cchild', 'echild'])
218
218
 
219
219
"""
220
220
 
 
221
from __future__ import absolute_import
 
222
 
221
223
import bisect
222
 
import contextlib
223
224
import errno
224
225
import operator
225
226
import os
229
230
import time
230
231
import zlib
231
232
 
232
 
from . import (
233
 
    inventory,
234
 
    )
235
 
from .. import (
 
233
from brzlib import (
236
234
    cache_utf8,
237
235
    config,
238
236
    debug,
239
237
    errors,
 
238
    inventory,
240
239
    lock,
241
240
    osutils,
242
241
    static_tuple,
243
242
    trace,
244
243
    urlutils,
245
244
    )
246
 
from ..tree import TreeChange
247
245
 
248
246
 
249
247
# This is the Windows equivalent of ENOTDIR
253
251
ERROR_DIRECTORY = 267
254
252
 
255
253
 
256
 
class DirstateCorrupt(errors.BzrError):
257
 
 
258
 
    _fmt = "The dirstate file (%(state)s) appears to be corrupt: %(msg)s"
259
 
 
260
 
    def __init__(self, state, msg):
261
 
        errors.BzrError.__init__(self)
262
 
        self.state = state
263
 
        self.msg = msg
264
 
 
265
 
 
266
254
class SHA1Provider(object):
267
255
    """An interface for getting sha1s of a file."""
268
256
 
276
264
 
277
265
    def stat_and_sha1(self, abspath):
278
266
        """Return the stat and sha1 of a file given its absolute path.
279
 
 
 
267
        
280
268
        :param abspath:  May be a filesystem encoded absolute path
281
269
             or a unicode path.
282
270
 
295
283
 
296
284
    def stat_and_sha1(self, abspath):
297
285
        """Return the stat and sha1 of a file given its absolute path."""
298
 
        with open(abspath, 'rb') as file_obj:
 
286
        file_obj = file(abspath, 'rb')
 
287
        try:
299
288
            statvalue = os.fstat(file_obj.fileno())
300
289
            sha1 = osutils.sha_file(file_obj)
 
290
        finally:
 
291
            file_obj.close()
301
292
        return statvalue, sha1
302
293
 
303
294
 
318
309
    """
319
310
 
320
311
    _kind_to_minikind = {
321
 
        'absent': b'a',
322
 
        'file': b'f',
323
 
        'directory': b'd',
324
 
        'relocated': b'r',
325
 
        'symlink': b'l',
326
 
        'tree-reference': b't',
 
312
            'absent': 'a',
 
313
            'file': 'f',
 
314
            'directory': 'd',
 
315
            'relocated': 'r',
 
316
            'symlink': 'l',
 
317
            'tree-reference': 't',
327
318
        }
328
319
    _minikind_to_kind = {
329
 
        b'a': 'absent',
330
 
        b'f': 'file',
331
 
        b'd': 'directory',
332
 
        b'l': 'symlink',
333
 
        b'r': 'relocated',
334
 
        b't': 'tree-reference',
 
320
            'a': 'absent',
 
321
            'f': 'file',
 
322
            'd': 'directory',
 
323
            'l':'symlink',
 
324
            'r': 'relocated',
 
325
            't': 'tree-reference',
335
326
        }
336
327
    _stat_to_minikind = {
337
 
        stat.S_IFDIR: b'd',
338
 
        stat.S_IFREG: b'f',
339
 
        stat.S_IFLNK: b'l',
 
328
        stat.S_IFDIR:'d',
 
329
        stat.S_IFREG:'f',
 
330
        stat.S_IFLNK:'l',
340
331
    }
341
 
    _to_yesno = {True: b'y', False: b'n'}  # TODO profile the performance gain
342
 
    # of using int conversion rather than a dict here. AND BLAME ANDREW IF
343
 
    # it is faster.
 
332
    _to_yesno = {True:'y', False: 'n'} # TODO profile the performance gain
 
333
     # of using int conversion rather than a dict here. AND BLAME ANDREW IF
 
334
     # it is faster.
344
335
 
345
336
    # TODO: jam 20070221 Figure out what to do if we have a record that exceeds
346
337
    #       the BISECT_PAGE_SIZE. For now, we just have to make it large enough
350
341
    NOT_IN_MEMORY = 0
351
342
    IN_MEMORY_UNMODIFIED = 1
352
343
    IN_MEMORY_MODIFIED = 2
353
 
    IN_MEMORY_HASH_MODIFIED = 3  # Only hash-cache updates
 
344
    IN_MEMORY_HASH_MODIFIED = 3 # Only hash-cache updates
354
345
 
355
346
    # A pack_stat (the x's) that is just noise and will never match the output
356
347
    # of base64 encode.
357
 
    NULLSTAT = b'x' * 32
358
 
    NULL_PARENT_DETAILS = static_tuple.StaticTuple(b'a', b'', 0, False, b'')
359
 
 
360
 
    HEADER_FORMAT_2 = b'#bazaar dirstate flat format 2\n'
361
 
    HEADER_FORMAT_3 = b'#bazaar dirstate flat format 3\n'
362
 
 
363
 
    def __init__(self, path, sha1_provider, worth_saving_limit=0,
364
 
                 use_filesystem_for_exec=True):
 
348
    NULLSTAT = 'x' * 32
 
349
    NULL_PARENT_DETAILS = static_tuple.StaticTuple('a', '', 0, False, '')
 
350
 
 
351
    HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'
 
352
    HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'
 
353
 
 
354
    def __init__(self, path, sha1_provider, worth_saving_limit=0):
365
355
        """Create a  DirState object.
366
356
 
367
357
        :param path: The path at which the dirstate file on disk should live.
370
360
            entries is known, only bother saving the dirstate if more than
371
361
            this count of entries have changed.
372
362
            -1 means never save hash changes, 0 means always save hash changes.
373
 
        :param use_filesystem_for_exec: Whether to trust the filesystem
374
 
            for executable bit information
375
363
        """
376
364
        # _header_state and _dirblock_state represent the current state
377
365
        # of the dirstate metadata and the per-row data respectiely.
420
408
        self._worth_saving_limit = worth_saving_limit
421
409
        self._config_stack = config.LocationStack(urlutils.local_path_to_url(
422
410
            path))
423
 
        self._use_filesystem_for_exec = use_filesystem_for_exec
424
411
 
425
412
    def __repr__(self):
426
413
        return "%s(%r)" % \
436
423
        """
437
424
        #trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
438
425
        if hash_changed_entries:
439
 
            self._known_hash_changes.update(
440
 
                [e[0] for e in hash_changed_entries])
 
426
            self._known_hash_changes.update([e[0] for e in hash_changed_entries])
441
427
            if self._dirblock_state in (DirState.NOT_IN_MEMORY,
442
428
                                        DirState.IN_MEMORY_UNMODIFIED):
443
429
                # If the dirstate is already marked a IN_MEMORY_MODIFIED, then
461
447
    def add(self, path, file_id, kind, stat, fingerprint):
462
448
        """Add a path to be tracked.
463
449
 
464
 
        :param path: The path within the dirstate - b'' is the root, 'foo' is the
 
450
        :param path: The path within the dirstate - '' is the root, 'foo' is the
465
451
            path foo within the root, 'foo/bar' is the path bar within foo
466
452
            within the root.
467
453
        :param file_id: The file id of the path being added.
472
458
            after any read filters have been applied),
473
459
            or the target of a symlink,
474
460
            or the referenced revision id for tree-references,
475
 
            or b'' for directories.
 
461
            or '' for directories.
476
462
        """
477
463
        # adding a file:
478
464
        # find the block its in.
479
465
        # find the location in the block.
480
466
        # check its not there
481
467
        # add it.
482
 
        # ------- copied from inventory.ensure_normalized_name - keep synced.
 
468
        #------- copied from inventory.ensure_normalized_name - keep synced.
483
469
        # --- normalized_filename wants a unicode basename only, so get one.
484
470
        dirname, basename = osutils.split(path)
485
471
        # we dont import normalized_filename directly because we want to be
493
479
        # you should never have files called . or ..; just add the directory
494
480
        # in the parent, or according to the special treatment for the root
495
481
        if basename == '.' or basename == '..':
496
 
            raise inventory.InvalidEntryName(path)
 
482
            raise errors.InvalidEntryName(path)
497
483
        # now that we've normalised, we need the correct utf8 path and
498
484
        # dirname and basename elements. This single encode and split should be
499
485
        # faster than three separate encodes.
500
486
        utf8path = (dirname + '/' + basename).strip('/').encode('utf8')
501
487
        dirname, basename = osutils.split(utf8path)
502
488
        # uses __class__ for speed; the check is needed for safety
503
 
        if file_id.__class__ is not bytes:
 
489
        if file_id.__class__ is not str:
504
490
            raise AssertionError(
505
491
                "must be a utf8 file_id not %s" % (type(file_id), ))
506
492
        # Make sure the file_id does not exist in this tree
507
493
        rename_from = None
508
 
        file_id_entry = self._get_entry(
509
 
            0, fileid_utf8=file_id, include_deleted=True)
 
494
        file_id_entry = self._get_entry(0, fileid_utf8=file_id, include_deleted=True)
510
495
        if file_id_entry != (None, None):
511
 
            if file_id_entry[1][0][0] == b'a':
 
496
            if file_id_entry[1][0][0] == 'a':
512
497
                if file_id_entry[0] != (dirname, basename, file_id):
513
498
                    # set the old name's current operation to rename
514
499
                    self.update_minimal(file_id_entry[0],
515
 
                                        b'r',
516
 
                                        path_utf8=b'',
517
 
                                        packed_stat=b'',
518
 
                                        fingerprint=utf8path
519
 
                                        )
 
500
                        'r',
 
501
                        path_utf8='',
 
502
                        packed_stat='',
 
503
                        fingerprint=utf8path
 
504
                    )
520
505
                    rename_from = file_id_entry[0][0:2]
521
506
            else:
522
 
                path = osutils.pathjoin(
523
 
                    file_id_entry[0][0], file_id_entry[0][1])
 
507
                path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])
524
508
                kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]
525
509
                info = '%s:%s' % (kind, path)
526
510
                raise errors.DuplicateFileId(file_id, info)
527
 
        first_key = (dirname, basename, b'')
 
511
        first_key = (dirname, basename, '')
528
512
        block_index, present = self._find_block_index_from_key(first_key)
529
513
        if present:
530
514
            # check the path is not in the tree
531
515
            block = self._dirblocks[block_index][1]
532
516
            entry_index, _ = self._find_entry_index(first_key, block)
533
517
            while (entry_index < len(block) and
534
 
                   block[entry_index][0][0:2] == first_key[0:2]):
535
 
                if block[entry_index][1][0][0] not in (b'a', b'r'):
 
518
                block[entry_index][0][0:2] == first_key[0:2]):
 
519
                if block[entry_index][1][0][0] not in 'ar':
536
520
                    # this path is in the dirstate in the current tree.
537
 
                    raise Exception("adding already added path!")
 
521
                    raise Exception, "adding already added path!"
538
522
                entry_index += 1
539
523
        else:
540
524
            # The block where we want to put the file is not present. But it
558
542
        minikind = DirState._kind_to_minikind[kind]
559
543
        if rename_from is not None:
560
544
            if rename_from[0]:
561
 
                old_path_utf8 = b'%s/%s' % rename_from
 
545
                old_path_utf8 = '%s/%s' % rename_from
562
546
            else:
563
547
                old_path_utf8 = rename_from[1]
564
 
            parent_info[0] = (b'r', old_path_utf8, 0, False, b'')
 
548
            parent_info[0] = ('r', old_path_utf8, 0, False, '')
565
549
        if kind == 'file':
566
550
            entry_data = entry_key, [
567
551
                (minikind, fingerprint, size, False, packed_stat),
568
552
                ] + parent_info
569
553
        elif kind == 'directory':
570
554
            entry_data = entry_key, [
571
 
                (minikind, b'', 0, False, packed_stat),
 
555
                (minikind, '', 0, False, packed_stat),
572
556
                ] + parent_info
573
557
        elif kind == 'symlink':
574
558
            entry_data = entry_key, [
584
568
        if not present:
585
569
            block.insert(entry_index, entry_data)
586
570
        else:
587
 
            if block[entry_index][1][0][0] != b'a':
588
 
                raise AssertionError(" %r(%r) already added" %
589
 
                                     (basename, file_id))
 
571
            if block[entry_index][1][0][0] != 'a':
 
572
                raise AssertionError(" %r(%r) already added" % (basename, file_id))
590
573
            block[entry_index][1][0] = entry_data[1][0]
591
574
 
592
575
        if kind == 'directory':
593
 
            # insert a new dirblock
594
 
            self._ensure_block(block_index, entry_index, utf8path)
 
576
           # insert a new dirblock
 
577
           self._ensure_block(block_index, entry_index, utf8path)
595
578
        self._mark_modified()
596
579
        if self._id_index:
597
580
            self._add_to_id_index(self._id_index, entry_key)
612
595
        # there, this function is only meant to handle when we want to read
613
596
        # part of the disk.
614
597
        if self._dirblock_state != DirState.NOT_IN_MEMORY:
615
 
            raise AssertionError("bad dirblock state %r" %
616
 
                                 self._dirblock_state)
 
598
            raise AssertionError("bad dirblock state %r" % self._dirblock_state)
617
599
 
618
600
        # The disk representation is generally info + '\0\n\0' at the end. But
619
601
        # for bisecting, it is easier to treat this as '\0' + info + '\0\n'
622
604
        file_size = os.fstat(state_file.fileno()).st_size
623
605
        # We end up with 2 extra fields, we should have a trailing '\n' to
624
606
        # ensure that we read the whole record, and we should have a precursur
625
 
        # b'' which ensures that we start after the previous '\n'
 
607
        # '' which ensures that we start after the previous '\n'
626
608
        entry_field_count = self._fields_per_entry() + 1
627
609
 
628
610
        low = self._end_of_header
629
 
        high = file_size - 1  # Ignore the final '\0'
 
611
        high = file_size - 1 # Ignore the final '\0'
630
612
        # Map from (dir, name) => entry
631
613
        found = {}
632
614
 
633
615
        # Avoid infinite seeking
634
 
        max_count = 30 * len(paths)
 
616
        max_count = 30*len(paths)
635
617
        count = 0
636
618
        # pending is a list of places to look.
637
619
        # each entry is a tuple of low, high, dir_names
656
638
            if count > max_count:
657
639
                raise errors.BzrError('Too many seeks, most likely a bug.')
658
640
 
659
 
            mid = max(low, (low + high - page_size) // 2)
 
641
            mid = max(low, (low+high-page_size)/2)
660
642
 
661
643
            state_file.seek(mid)
662
644
            # limit the read size, so we don't end up reading data that we have
663
645
            # already read.
664
 
            read_size = min(page_size, (high - mid) + 1)
 
646
            read_size = min(page_size, (high-mid)+1)
665
647
            block = state_file.read(read_size)
666
648
 
667
649
            start = mid
668
 
            entries = block.split(b'\n')
 
650
            entries = block.split('\n')
669
651
 
670
652
            if len(entries) < 2:
671
653
                # We didn't find a '\n', so we cannot have found any records.
679
661
            # Check the first and last entries, in case they are partial, or if
680
662
            # we don't care about the rest of this page
681
663
            first_entry_num = 0
682
 
            first_fields = entries[0].split(b'\0')
 
664
            first_fields = entries[0].split('\0')
683
665
            if len(first_fields) < entry_field_count:
684
666
                # We didn't get the complete first entry
685
667
                # so move start, and grab the next, which
686
668
                # should be a full entry
687
 
                start += len(entries[0]) + 1
688
 
                first_fields = entries[1].split(b'\0')
 
669
                start += len(entries[0])+1
 
670
                first_fields = entries[1].split('\0')
689
671
                first_entry_num = 1
690
672
 
691
673
            if len(first_fields) <= 2:
699
681
                # after this first record.
700
682
                after = start
701
683
                if first_fields[1]:
702
 
                    first_path = first_fields[1] + b'/' + first_fields[2]
 
684
                    first_path = first_fields[1] + '/' + first_fields[2]
703
685
                else:
704
686
                    first_path = first_fields[2]
705
687
                first_loc = _bisect_path_left(cur_files, first_path)
714
696
                # We have files after the first entry
715
697
 
716
698
                # Parse the last entry
717
 
                last_entry_num = len(entries) - 1
718
 
                last_fields = entries[last_entry_num].split(b'\0')
 
699
                last_entry_num = len(entries)-1
 
700
                last_fields = entries[last_entry_num].split('\0')
719
701
                if len(last_fields) < entry_field_count:
720
702
                    # The very last hunk was not complete,
721
703
                    # read the previous hunk
722
704
                    after = mid + len(block) - len(entries[-1])
723
705
                    last_entry_num -= 1
724
 
                    last_fields = entries[last_entry_num].split(b'\0')
 
706
                    last_fields = entries[last_entry_num].split('\0')
725
707
                else:
726
708
                    after = mid + len(block)
727
709
 
728
710
                if last_fields[1]:
729
 
                    last_path = last_fields[1] + b'/' + last_fields[2]
 
711
                    last_path = last_fields[1] + '/' + last_fields[2]
730
712
                else:
731
713
                    last_path = last_fields[2]
732
714
                last_loc = _bisect_path_right(post, last_path)
747
729
                        post.insert(0, last_path)
748
730
 
749
731
                    # Find out what paths we have
750
 
                    paths = {first_path: [first_fields]}
 
732
                    paths = {first_path:[first_fields]}
751
733
                    # last_path might == first_path so we need to be
752
734
                    # careful if we should append rather than overwrite
753
735
                    if last_entry_num != first_entry_num:
754
736
                        paths.setdefault(last_path, []).append(last_fields)
755
 
                    for num in range(first_entry_num + 1, last_entry_num):
 
737
                    for num in xrange(first_entry_num+1, last_entry_num):
756
738
                        # TODO: jam 20070223 We are already splitting here, so
757
739
                        #       shouldn't we just split the whole thing rather
758
740
                        #       than doing the split again in add_one_record?
759
 
                        fields = entries[num].split(b'\0')
 
741
                        fields = entries[num].split('\0')
760
742
                        if fields[1]:
761
 
                            path = fields[1] + b'/' + fields[2]
 
743
                            path = fields[1] + '/' + fields[2]
762
744
                        else:
763
745
                            path = fields[2]
764
746
                        paths.setdefault(path, []).append(fields)
779
761
            if post:
780
762
                pending.append((after, high, post))
781
763
            if pre:
782
 
                pending.append((low, start - 1, pre))
 
764
                pending.append((low, start-1, pre))
783
765
 
784
766
        # Consider that we may want to return the directory entries in sorted
785
767
        # order. For now, we just return them in whatever order we found them,
806
788
        # there, this function is only meant to handle when we want to read
807
789
        # part of the disk.
808
790
        if self._dirblock_state != DirState.NOT_IN_MEMORY:
809
 
            raise AssertionError("bad dirblock state %r" %
810
 
                                 self._dirblock_state)
 
791
            raise AssertionError("bad dirblock state %r" % self._dirblock_state)
811
792
        # The disk representation is generally info + '\0\n\0' at the end. But
812
793
        # for bisecting, it is easier to treat this as '\0' + info + '\0\n'
813
794
        # Because it means we can sync on the '\n'
815
796
        file_size = os.fstat(state_file.fileno()).st_size
816
797
        # We end up with 2 extra fields, we should have a trailing '\n' to
817
798
        # ensure that we read the whole record, and we should have a precursur
818
 
        # b'' which ensures that we start after the previous '\n'
 
799
        # '' which ensures that we start after the previous '\n'
819
800
        entry_field_count = self._fields_per_entry() + 1
820
801
 
821
802
        low = self._end_of_header
822
 
        high = file_size - 1  # Ignore the final '\0'
 
803
        high = file_size - 1 # Ignore the final '\0'
823
804
        # Map from dir => entry
824
805
        found = {}
825
806
 
826
807
        # Avoid infinite seeking
827
 
        max_count = 30 * len(dir_list)
 
808
        max_count = 30*len(dir_list)
828
809
        count = 0
829
810
        # pending is a list of places to look.
830
811
        # each entry is a tuple of low, high, dir_names
849
830
            if count > max_count:
850
831
                raise errors.BzrError('Too many seeks, most likely a bug.')
851
832
 
852
 
            mid = max(low, (low + high - page_size) // 2)
 
833
            mid = max(low, (low+high-page_size)/2)
853
834
 
854
835
            state_file.seek(mid)
855
836
            # limit the read size, so we don't end up reading data that we have
856
837
            # already read.
857
 
            read_size = min(page_size, (high - mid) + 1)
 
838
            read_size = min(page_size, (high-mid)+1)
858
839
            block = state_file.read(read_size)
859
840
 
860
841
            start = mid
861
 
            entries = block.split(b'\n')
 
842
            entries = block.split('\n')
862
843
 
863
844
            if len(entries) < 2:
864
845
                # We didn't find a '\n', so we cannot have found any records.
872
853
            # Check the first and last entries, in case they are partial, or if
873
854
            # we don't care about the rest of this page
874
855
            first_entry_num = 0
875
 
            first_fields = entries[0].split(b'\0')
 
856
            first_fields = entries[0].split('\0')
876
857
            if len(first_fields) < entry_field_count:
877
858
                # We didn't get the complete first entry
878
859
                # so move start, and grab the next, which
879
860
                # should be a full entry
880
 
                start += len(entries[0]) + 1
881
 
                first_fields = entries[1].split(b'\0')
 
861
                start += len(entries[0])+1
 
862
                first_fields = entries[1].split('\0')
882
863
                first_entry_num = 1
883
864
 
884
865
            if len(first_fields) <= 1:
904
885
                # We have records to look at after the first entry
905
886
 
906
887
                # Parse the last entry
907
 
                last_entry_num = len(entries) - 1
908
 
                last_fields = entries[last_entry_num].split(b'\0')
 
888
                last_entry_num = len(entries)-1
 
889
                last_fields = entries[last_entry_num].split('\0')
909
890
                if len(last_fields) < entry_field_count:
910
891
                    # The very last hunk was not complete,
911
892
                    # read the previous hunk
912
893
                    after = mid + len(block) - len(entries[-1])
913
894
                    last_entry_num -= 1
914
 
                    last_fields = entries[last_entry_num].split(b'\0')
 
895
                    last_fields = entries[last_entry_num].split('\0')
915
896
                else:
916
897
                    after = mid + len(block)
917
898
 
934
915
                        post.insert(0, last_dir)
935
916
 
936
917
                    # Find out what paths we have
937
 
                    paths = {first_dir: [first_fields]}
 
918
                    paths = {first_dir:[first_fields]}
938
919
                    # last_dir might == first_dir so we need to be
939
920
                    # careful if we should append rather than overwrite
940
921
                    if last_entry_num != first_entry_num:
941
922
                        paths.setdefault(last_dir, []).append(last_fields)
942
 
                    for num in range(first_entry_num + 1, last_entry_num):
 
923
                    for num in xrange(first_entry_num+1, last_entry_num):
943
924
                        # TODO: jam 20070223 We are already splitting here, so
944
925
                        #       shouldn't we just split the whole thing rather
945
926
                        #       than doing the split again in add_one_record?
946
 
                        fields = entries[num].split(b'\0')
 
927
                        fields = entries[num].split('\0')
947
928
                        paths.setdefault(fields[1], []).append(fields)
948
929
 
949
930
                    for cur_dir in middle_files:
962
943
            if post:
963
944
                pending.append((after, high, post))
964
945
            if pre:
965
 
                pending.append((low, start - 1, pre))
 
946
                pending.append((low, start-1, pre))
966
947
 
967
948
        return found
968
949
 
974
955
        directories. (and renames?)
975
956
 
976
957
        :param paths: A sorted list of (dir, name) pairs
977
 
             eg: [('', b'a'), ('', b'f'), ('a/b', b'c')]
 
958
             eg: [('', 'a'), ('', 'f'), ('a/b', 'c')]
978
959
        :return: A dictionary mapping (dir, name, file_id) => [tree_info]
979
960
        """
980
961
        # Map from (dir, name, file_id) => [tree_info]
991
972
            # Directories that need to be read
992
973
            pending_dirs = set()
993
974
            paths_to_search = set()
994
 
            for entry_list in newly_found.values():
 
975
            for entry_list in newly_found.itervalues():
995
976
                for dir_name_id, trees_info in entry_list:
996
977
                    found[dir_name_id] = trees_info
997
978
                    found_dir_names.add(dir_name_id[:2])
998
979
                    is_dir = False
999
980
                    for tree_info in trees_info:
1000
981
                        minikind = tree_info[0]
1001
 
                        if minikind == b'd':
 
982
                        if minikind == 'd':
1002
983
                            if is_dir:
1003
984
                                # We already processed this one as a directory,
1004
985
                                # we don't need to do the extra work again.
1008
989
                            is_dir = True
1009
990
                            if path not in processed_dirs:
1010
991
                                pending_dirs.add(path)
1011
 
                        elif minikind == b'r':
 
992
                        elif minikind == 'r':
1012
993
                            # Rename, we need to directly search the target
1013
994
                            # which is contained in the fingerprint column
1014
995
                            dir_name = osutils.split(tree_info[1])
1041
1022
            return
1042
1023
        # only require all dirblocks if we are doing a full-pass removal.
1043
1024
        self._read_dirblocks_if_needed()
1044
 
        dead_patterns = {(b'a', b'r'), (b'a', b'a'),
1045
 
                         (b'r', b'r'), (b'r', b'a')}
1046
 
 
 
1025
        dead_patterns = set([('a', 'r'), ('a', 'a'), ('r', 'r'), ('r', 'a')])
1047
1026
        def iter_entries_removable():
1048
1027
            for block in self._dirblocks:
1049
1028
                deleted_positions = []
1072
1051
 
1073
1052
    def _empty_parent_info(self):
1074
1053
        return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
1075
 
                                                 len(self._ghosts))
 
1054
                                                    len(self._ghosts))
1076
1055
 
1077
1056
    def _ensure_block(self, parent_block_index, parent_row_index, dirname):
1078
1057
        """Ensure a block for dirname exists.
1094
1073
        :param dirname: The utf8 dirname to ensure there is a block for.
1095
1074
        :return: The index for the block.
1096
1075
        """
1097
 
        if dirname == b'' and parent_row_index == 0 and parent_block_index == 0:
 
1076
        if dirname == '' and parent_row_index == 0 and parent_block_index == 0:
1098
1077
            # This is the signature of the root row, and the
1099
1078
            # contents-of-root row is always index 1
1100
1079
            return 1
1101
1080
        # the basename of the directory must be the end of its full name.
1102
1081
        if not (parent_block_index == -1 and
1103
 
                parent_block_index == -1 and dirname == b''):
 
1082
            parent_block_index == -1 and dirname == ''):
1104
1083
            if not dirname.endswith(
1105
1084
                    self._dirblocks[parent_block_index][1][parent_row_index][0][1]):
1106
1085
                raise AssertionError("bad dirname %r" % dirname)
1107
 
        block_index, present = self._find_block_index_from_key(
1108
 
            (dirname, b'', b''))
 
1086
        block_index, present = self._find_block_index_from_key((dirname, '', ''))
1109
1087
        if not present:
1110
 
            # In future, when doing partial parsing, this should load and
 
1088
            ## In future, when doing partial parsing, this should load and
1111
1089
            # populate the entire block.
1112
1090
            self._dirblocks.insert(block_index, (dirname, []))
1113
1091
        return block_index
1122
1100
            to prevent unneeded overhead when callers have a sorted list already.
1123
1101
        :return: Nothing.
1124
1102
        """
1125
 
        if new_entries[0][0][0:2] != (b'', b''):
 
1103
        if new_entries[0][0][0:2] != ('', ''):
1126
1104
            raise AssertionError(
1127
1105
                "Missing root row %r" % (new_entries[0][0],))
1128
1106
        # The two blocks here are deliberate: the root block and the
1129
1107
        # contents-of-root block.
1130
 
        self._dirblocks = [(b'', []), (b'', [])]
 
1108
        self._dirblocks = [('', []), ('', [])]
1131
1109
        current_block = self._dirblocks[0][1]
1132
 
        current_dirname = b''
1133
 
        root_key = (b'', b'')
 
1110
        current_dirname = ''
 
1111
        root_key = ('', '')
1134
1112
        append_entry = current_block.append
1135
1113
        for entry in new_entries:
1136
1114
            if entry[0][0] != current_dirname:
1152
1130
        # The above loop leaves the "root block" entries mixed with the
1153
1131
        # "contents-of-root block". But we don't want an if check on
1154
1132
        # all entries, so instead we just fix it up here.
1155
 
        if self._dirblocks[1] != (b'', []):
 
1133
        if self._dirblocks[1] != ('', []):
1156
1134
            raise ValueError("bad dirblock start %r" % (self._dirblocks[1],))
1157
1135
        root_block = []
1158
1136
        contents_of_root_block = []
1159
1137
        for entry in self._dirblocks[0][1]:
1160
 
            if not entry[0][1]:  # This is a root entry
 
1138
            if not entry[0][1]: # This is a root entry
1161
1139
                root_block.append(entry)
1162
1140
            else:
1163
1141
                contents_of_root_block.append(entry)
1164
 
        self._dirblocks[0] = (b'', root_block)
1165
 
        self._dirblocks[1] = (b'', contents_of_root_block)
 
1142
        self._dirblocks[0] = ('', root_block)
 
1143
        self._dirblocks[1] = ('', contents_of_root_block)
1166
1144
 
1167
1145
    def _entries_for_path(self, path):
1168
1146
        """Return a list with all the entries that match path for all ids."""
1169
1147
        dirname, basename = os.path.split(path)
1170
 
        key = (dirname, basename, b'')
 
1148
        key = (dirname, basename, '')
1171
1149
        block_index, present = self._find_block_index_from_key(key)
1172
1150
        if not present:
1173
1151
            # the block which should contain path is absent.
1177
1155
        entry_index, _ = self._find_entry_index(key, block)
1178
1156
        # we may need to look at multiple entries at this path: walk while the specific_files match.
1179
1157
        while (entry_index < len(block) and
1180
 
               block[entry_index][0][0:2] == key[0:2]):
 
1158
            block[entry_index][0][0:2] == key[0:2]):
1181
1159
            result.append(block[entry_index])
1182
1160
            entry_index += 1
1183
1161
        return result
1196
1174
            # minikind
1197
1175
            entire_entry[tree_offset + 0] = tree_data[0]
1198
1176
            # size
1199
 
            entire_entry[tree_offset + 2] = b'%d' % tree_data[2]
 
1177
            entire_entry[tree_offset + 2] = str(tree_data[2])
1200
1178
            # executable
1201
1179
            entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]]
1202
 
        return b'\0'.join(entire_entry)
 
1180
        return '\0'.join(entire_entry)
1203
1181
 
1204
1182
    def _fields_per_entry(self):
1205
1183
        """How many null separated fields should be in each entry row.
1242
1220
 
1243
1221
        :return: The block index, True if the block for the key is present.
1244
1222
        """
1245
 
        if key[0:2] == (b'', b''):
 
1223
        if key[0:2] == ('', ''):
1246
1224
            return 0, True
1247
1225
        try:
1248
1226
            if (self._last_block_index is not None and
1249
 
                    self._dirblocks[self._last_block_index][0] == key[0]):
 
1227
                self._dirblocks[self._last_block_index][0] == key[0]):
1250
1228
                return self._last_block_index, True
1251
1229
        except IndexError:
1252
1230
            pass
1254
1232
                                      cache=self._split_path_cache)
1255
1233
        # _right returns one-past-where-key is so we have to subtract
1256
1234
        # one to use it. we use _right here because there are two
1257
 
        # b'' blocks - the root, and the contents of root
 
1235
        # '' blocks - the root, and the contents of root
1258
1236
        # we always have a minimum of 2 in self._dirblocks: root and
1259
 
        # root-contents, and for b'', we get 2 back, so this is
 
1237
        # root-contents, and for '', we get 2 back, so this is
1260
1238
        # simple and correct:
1261
1239
        present = (block_index < len(self._dirblocks) and
1262
 
                   self._dirblocks[block_index][0] == key[0])
 
1240
            self._dirblocks[block_index][0] == key[0])
1263
1241
        self._last_block_index = block_index
1264
1242
        # Reset the entry index cache to the beginning of the block.
1265
1243
        self._last_entry_index = -1
1278
1256
                # A hit is when the key is after the last slot, and before or
1279
1257
                # equal to the next slot.
1280
1258
                if ((entry_index > 0 and block[entry_index - 1][0] < key) and
1281
 
                        key <= block[entry_index][0]):
 
1259
                    key <= block[entry_index][0]):
1282
1260
                    self._last_entry_index = entry_index
1283
1261
                    present = (block[entry_index][0] == key)
1284
1262
                    return entry_index, present
1286
1264
            pass
1287
1265
        entry_index = bisect.bisect_left(block, (key, []))
1288
1266
        present = (entry_index < len_block and
1289
 
                   block[entry_index][0] == key)
 
1267
            block[entry_index][0] == key)
1290
1268
        self._last_entry_index = entry_index
1291
1269
        return entry_index, present
1292
1270
 
1302
1280
            (it was locked by DirState.initialize)
1303
1281
        """
1304
1282
        result = DirState.initialize(dir_state_filename,
1305
 
                                     sha1_provider=sha1_provider)
 
1283
            sha1_provider=sha1_provider)
1306
1284
        try:
1307
 
            with contextlib.ExitStack() as exit_stack:
1308
 
                exit_stack.enter_context(tree.lock_read())
 
1285
            tree.lock_read()
 
1286
            try:
1309
1287
                parent_ids = tree.get_parent_ids()
1310
1288
                num_parents = len(parent_ids)
1311
1289
                parent_trees = []
1312
1290
                for parent_id in parent_ids:
1313
 
                    parent_tree = tree.branch.repository.revision_tree(
1314
 
                        parent_id)
 
1291
                    parent_tree = tree.branch.repository.revision_tree(parent_id)
1315
1292
                    parent_trees.append((parent_id, parent_tree))
1316
 
                    exit_stack.enter_context(parent_tree.lock_read())
 
1293
                    parent_tree.lock_read()
1317
1294
                result.set_parent_trees(parent_trees, [])
1318
1295
                result.set_state_from_inventory(tree.root_inventory)
 
1296
            finally:
 
1297
                for revid, parent_tree in parent_trees:
 
1298
                    parent_tree.unlock()
 
1299
                tree.unlock()
1319
1300
        except:
1320
1301
            # The caller won't have a chance to unlock this, so make sure we
1321
1302
            # cleanup ourselves
1324
1305
        return result
1325
1306
 
1326
1307
    def _check_delta_is_valid(self, delta):
1327
 
        delta = list(inventory._check_delta_unique_ids(
1328
 
                     inventory._check_delta_unique_old_paths(
1329
 
                         inventory._check_delta_unique_new_paths(
1330
 
                             inventory._check_delta_ids_match_entry(
1331
 
                                 inventory._check_delta_ids_are_valid(
1332
 
                                     inventory._check_delta_new_path_entry_both_or_None(delta)))))))
1333
 
 
1334
 
        def delta_key(d):
1335
 
            (old_path, new_path, file_id, new_entry) = d
1336
 
            if old_path is None:
1337
 
                old_path = ''
1338
 
            if new_path is None:
1339
 
                new_path = ''
1340
 
            return (old_path, new_path, file_id, new_entry)
1341
 
        delta.sort(key=delta_key, reverse=True)
1342
 
        return delta
 
1308
        return list(inventory._check_delta_unique_ids(
 
1309
                    inventory._check_delta_unique_old_paths(
 
1310
                    inventory._check_delta_unique_new_paths(
 
1311
                    inventory._check_delta_ids_match_entry(
 
1312
                    inventory._check_delta_ids_are_valid(
 
1313
                    inventory._check_delta_new_path_entry_both_or_None(delta)))))))
1343
1314
 
1344
1315
    def update_by_delta(self, delta):
1345
1316
        """Apply an inventory delta to the dirstate for tree 0
1364
1335
        new_ids = set()
1365
1336
        # This loop transforms the delta to single atomic operations that can
1366
1337
        # be executed and validated.
1367
 
        delta = self._check_delta_is_valid(delta)
 
1338
        delta = sorted(self._check_delta_is_valid(delta), reverse=True)
1368
1339
        for old_path, new_path, file_id, inv_entry in delta:
1369
 
            if not isinstance(file_id, bytes):
1370
 
                raise AssertionError(
1371
 
                    "must be a utf8 file_id not %s" % (type(file_id), ))
1372
1340
            if (file_id in insertions) or (file_id in removals):
1373
1341
                self._raise_invalid(old_path or new_path, file_id,
1374
 
                                    "repeated file_id")
 
1342
                    "repeated file_id")
1375
1343
            if old_path is not None:
1376
1344
                old_path = old_path.encode('utf-8')
1377
1345
                removals[file_id] = old_path
1380
1348
            if new_path is not None:
1381
1349
                if inv_entry is None:
1382
1350
                    self._raise_invalid(new_path, file_id,
1383
 
                                        "new_path with no entry")
 
1351
                        "new_path with no entry")
1384
1352
                new_path = new_path.encode('utf-8')
1385
1353
                dirname_utf8, basename = osutils.split(new_path)
1386
1354
                if basename:
1387
1355
                    parents.add((dirname_utf8, inv_entry.parent_id))
1388
1356
                key = (dirname_utf8, basename, file_id)
1389
1357
                minikind = DirState._kind_to_minikind[inv_entry.kind]
1390
 
                if minikind == b't':
1391
 
                    fingerprint = inv_entry.reference_revision or b''
 
1358
                if minikind == 't':
 
1359
                    fingerprint = inv_entry.reference_revision or ''
1392
1360
                else:
1393
 
                    fingerprint = b''
 
1361
                    fingerprint = ''
1394
1362
                insertions[file_id] = (key, minikind, inv_entry.executable,
1395
1363
                                       fingerprint, new_path)
1396
1364
            # Transform moves into delete+add pairs
1415
1383
                                               fingerprint, new_child_path)
1416
1384
        self._check_delta_ids_absent(new_ids, delta, 0)
1417
1385
        try:
1418
 
            self._apply_removals(removals.items())
 
1386
            self._apply_removals(removals.iteritems())
1419
1387
            self._apply_insertions(insertions.values())
1420
1388
            # Validate parents
1421
1389
            self._after_delta_check_parents(parents, 0)
1422
 
        except errors.BzrError as e:
 
1390
        except errors.BzrError, e:
1423
1391
            self._changes_aborted = True
1424
1392
            if 'integrity error' not in str(e):
1425
1393
                raise
1426
1394
            # _get_entry raises BzrError when a request is inconsistent; we
1427
 
            # want such errors to be shown as InconsistentDelta - and that
 
1395
            # want such errors to be shown as InconsistentDelta - and that 
1428
1396
            # fits the behaviour we trigger.
1429
1397
            raise errors.InconsistentDeltaDelta(delta,
1430
 
                                                "error from _get_entry. %s" % (e,))
 
1398
                "error from _get_entry. %s" % (e,))
1431
1399
 
1432
1400
    def _apply_removals(self, removals):
1433
1401
        for file_id, path in sorted(removals, reverse=True,
1434
 
                                    key=operator.itemgetter(1)):
 
1402
            key=operator.itemgetter(1)):
1435
1403
            dirname, basename = osutils.split(path)
1436
1404
            block_i, entry_i, d_present, f_present = \
1437
1405
                self._get_block_entry_index(dirname, basename, 0)
1439
1407
                entry = self._dirblocks[block_i][1][entry_i]
1440
1408
            except IndexError:
1441
1409
                self._raise_invalid(path, file_id,
1442
 
                                    "Wrong path for old path.")
1443
 
            if not f_present or entry[1][0][0] in (b'a', b'r'):
 
1410
                    "Wrong path for old path.")
 
1411
            if not f_present or entry[1][0][0] in 'ar':
1444
1412
                self._raise_invalid(path, file_id,
1445
 
                                    "Wrong path for old path.")
 
1413
                    "Wrong path for old path.")
1446
1414
            if file_id != entry[0][2]:
1447
1415
                self._raise_invalid(path, file_id,
1448
 
                                    "Attempt to remove path has wrong id - found %r."
1449
 
                                    % entry[0][2])
 
1416
                    "Attempt to remove path has wrong id - found %r."
 
1417
                    % entry[0][2])
1450
1418
            self._make_absent(entry)
1451
1419
            # See if we have a malformed delta: deleting a directory must not
1452
1420
            # leave crud behind. This increases the number of bisects needed
1454
1422
            # is rare enough it shouldn't be an issue (famous last words?) RBC
1455
1423
            # 20080730.
1456
1424
            block_i, entry_i, d_present, f_present = \
1457
 
                self._get_block_entry_index(path, b'', 0)
 
1425
                self._get_block_entry_index(path, '', 0)
1458
1426
            if d_present:
1459
1427
                # The dir block is still present in the dirstate; this could
1460
1428
                # be due to it being in a parent tree, or a corrupt delta.
1461
1429
                for child_entry in self._dirblocks[block_i][1]:
1462
 
                    if child_entry[1][0][0] not in (b'r', b'a'):
 
1430
                    if child_entry[1][0][0] not in ('r', 'a'):
1463
1431
                        self._raise_invalid(path, entry[0][2],
1464
 
                                            "The file id was deleted but its children were "
1465
 
                                            "not deleted.")
 
1432
                            "The file id was deleted but its children were "
 
1433
                            "not deleted.")
1466
1434
 
1467
1435
    def _apply_insertions(self, adds):
1468
1436
        try:
1471
1439
                                    path_utf8=path_utf8)
1472
1440
        except errors.NotVersionedError:
1473
1441
            self._raise_invalid(path_utf8.decode('utf8'), key[2],
1474
 
                                "Missing parent")
 
1442
                "Missing parent")
1475
1443
 
1476
1444
    def update_basis_by_delta(self, delta, new_revid):
1477
1445
        """Update the parents of this tree after a commit.
1501
1469
 
1502
1470
        self._parents[0] = new_revid
1503
1471
 
1504
 
        delta = self._check_delta_is_valid(delta)
 
1472
        delta = sorted(self._check_delta_is_valid(delta), reverse=True)
1505
1473
        adds = []
1506
1474
        changes = []
1507
1475
        deletes = []
1527
1495
        # ids.
1528
1496
        new_ids = set()
1529
1497
        for old_path, new_path, file_id, inv_entry in delta:
1530
 
            if file_id.__class__ is not bytes:
1531
 
                raise AssertionError(
1532
 
                    "must be a utf8 file_id not %s" % (type(file_id), ))
1533
1498
            if inv_entry is not None and file_id != inv_entry.file_id:
1534
1499
                self._raise_invalid(new_path, file_id,
1535
 
                                    "mismatched entry file_id %r" % inv_entry)
 
1500
                    "mismatched entry file_id %r" % inv_entry)
1536
1501
            if new_path is None:
1537
1502
                new_path_utf8 = None
1538
1503
            else:
1539
1504
                if inv_entry is None:
1540
1505
                    self._raise_invalid(new_path, file_id,
1541
 
                                        "new_path with no entry")
 
1506
                        "new_path with no entry")
1542
1507
                new_path_utf8 = encode(new_path)
1543
1508
                # note the parent for validation
1544
1509
                dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
1550
1515
                old_path_utf8 = encode(old_path)
1551
1516
            if old_path is None:
1552
1517
                adds.append((None, new_path_utf8, file_id,
1553
 
                             inv_to_entry(inv_entry), True))
 
1518
                    inv_to_entry(inv_entry), True))
1554
1519
                new_ids.add(file_id)
1555
1520
            elif new_path is None:
1556
1521
                deletes.append((old_path_utf8, None, file_id, None, True))
1574
1539
                # pair will result in the deleted item being reinserted, or
1575
1540
                # renamed items being reinserted twice - and possibly at the
1576
1541
                # wrong place. Splitting into a delete/add pair also simplifies
1577
 
                # the handling of entries with (b'f', ...), (b'r' ...) because
1578
 
                # the target of the b'r' is old_path here, and we add that to
 
1542
                # the handling of entries with ('f', ...), ('r' ...) because
 
1543
                # the target of the 'r' is old_path here, and we add that to
1579
1544
                # deletes, meaning that the add handler does not need to check
1580
 
                # for b'r' items on every pass.
 
1545
                # for 'r' items on every pass.
1581
1546
                self._update_basis_apply_deletes(deletes)
1582
1547
                deletes = []
1583
1548
                # Split into an add/delete pair recursively.
1592
1557
                for entry in new_deletes:
1593
1558
                    child_dirname, child_basename, child_file_id = entry[0]
1594
1559
                    if child_dirname:
1595
 
                        source_path = child_dirname + b'/' + child_basename
 
1560
                        source_path = child_dirname + '/' + child_basename
1596
1561
                    else:
1597
1562
                        source_path = child_basename
1598
1563
                    if new_path_utf8:
1599
1564
                        target_path = \
1600
1565
                            new_path_utf8 + source_path[len(old_path_utf8):]
1601
1566
                    else:
1602
 
                        if old_path_utf8 == b'':
 
1567
                        if old_path_utf8 == '':
1603
1568
                            raise AssertionError("cannot rename directory to"
1604
1569
                                                 " itself")
1605
1570
                        target_path = source_path[len(old_path_utf8) + 1:]
1606
 
                    adds.append(
1607
 
                        (None, target_path, entry[0][2], entry[1][1], False))
 
1571
                    adds.append((None, target_path, entry[0][2], entry[1][1], False))
1608
1572
                    deletes.append(
1609
1573
                        (source_path, target_path, entry[0][2], None, False))
1610
1574
                deletes.append(
1620
1584
            self._update_basis_apply_changes(changes)
1621
1585
            # Validate parents
1622
1586
            self._after_delta_check_parents(parents, 1)
1623
 
        except errors.BzrError as e:
 
1587
        except errors.BzrError, e:
1624
1588
            self._changes_aborted = True
1625
1589
            if 'integrity error' not in str(e):
1626
1590
                raise
1628
1592
            # want such errors to be shown as InconsistentDelta - and that
1629
1593
            # fits the behaviour we trigger.
1630
1594
            raise errors.InconsistentDeltaDelta(delta,
1631
 
                                                "error from _get_entry. %s" % (e,))
 
1595
                "error from _get_entry. %s" % (e,))
1632
1596
 
1633
1597
        self._mark_modified(header_modified=True)
1634
1598
        self._id_index = None
1650
1614
                if entry[0][2] != file_id:
1651
1615
                    # Different file_id, so not what we want.
1652
1616
                    continue
1653
 
                self._raise_invalid((b"%s/%s" % key[0:2]).decode('utf8'), file_id,
1654
 
                                    "This file_id is new in the delta but already present in "
1655
 
                                    "the target")
 
1617
                self._raise_invalid(("%s/%s" % key[0:2]).decode('utf8'), file_id,
 
1618
                    "This file_id is new in the delta but already present in "
 
1619
                    "the target")
1656
1620
 
1657
1621
    def _raise_invalid(self, path, file_id, reason):
1658
1622
        self._changes_aborted = True
1677
1641
        adds.sort(key=lambda x: x[1])
1678
1642
        # adds is now in lexographic order, which places all parents before
1679
1643
        # their children, so we can process it linearly.
 
1644
        absent = 'ar'
1680
1645
        st = static_tuple.StaticTuple
1681
1646
        for old_path, new_path, file_id, new_details, real_add in adds:
1682
1647
            dirname, basename = osutils.split(new_path)
1691
1656
                    self._get_block_entry_index(parent_dir, parent_base, 1)
1692
1657
                if not parent_present:
1693
1658
                    self._raise_invalid(new_path, file_id,
1694
 
                                        "Unable to find block for this record."
1695
 
                                        " Was the parent added?")
 
1659
                        "Unable to find block for this record."
 
1660
                        " Was the parent added?")
1696
1661
                self._ensure_block(parent_block_idx, parent_entry_idx, dirname)
1697
1662
 
1698
1663
            block = self._dirblocks[block_index][1]
1700
1665
            if real_add:
1701
1666
                if old_path is not None:
1702
1667
                    self._raise_invalid(new_path, file_id,
1703
 
                                        'considered a real add but still had old_path at %s'
1704
 
                                        % (old_path,))
 
1668
                        'considered a real add but still had old_path at %s'
 
1669
                        % (old_path,))
1705
1670
            if present:
1706
1671
                entry = block[entry_index]
1707
1672
                basis_kind = entry[1][1][0]
1708
 
                if basis_kind == b'a':
 
1673
                if basis_kind == 'a':
1709
1674
                    entry[1][1] = new_details
1710
 
                elif basis_kind == b'r':
 
1675
                elif basis_kind == 'r':
1711
1676
                    raise NotImplementedError()
1712
1677
                else:
1713
1678
                    self._raise_invalid(new_path, file_id,
1714
 
                                        "An entry was marked as a new add"
1715
 
                                        " but the basis target already existed")
 
1679
                        "An entry was marked as a new add"
 
1680
                        " but the basis target already existed")
1716
1681
            else:
1717
1682
                # The exact key was not found in the block. However, we need to
1718
1683
                # check if there is a key next to us that would have matched.
1719
1684
                # We only need to check 2 locations, because there are only 2
1720
1685
                # trees present.
1721
 
                for maybe_index in range(entry_index - 1, entry_index + 1):
 
1686
                for maybe_index in range(entry_index-1, entry_index+1):
1722
1687
                    if maybe_index < 0 or maybe_index >= len(block):
1723
1688
                        continue
1724
1689
                    maybe_entry = block[maybe_index]
1731
1696
                            ' but walking the data did, for %s'
1732
1697
                            % (entry_key,))
1733
1698
                    basis_kind = maybe_entry[1][1][0]
1734
 
                    if basis_kind not in (b'a', b'r'):
 
1699
                    if basis_kind not in 'ar':
1735
1700
                        self._raise_invalid(new_path, file_id,
1736
 
                                            "we have an add record for path, but the path"
1737
 
                                            " is already present with another file_id %s"
1738
 
                                            % (maybe_entry[0][2],))
 
1701
                            "we have an add record for path, but the path"
 
1702
                            " is already present with another file_id %s"
 
1703
                            % (maybe_entry[0][2],))
1739
1704
 
1740
1705
                entry = (entry_key, [DirState.NULL_PARENT_DETAILS,
1741
1706
                                     new_details])
1742
1707
                block.insert(entry_index, entry)
1743
1708
 
1744
1709
            active_kind = entry[1][0][0]
1745
 
            if active_kind == b'a':
 
1710
            if active_kind == 'a':
1746
1711
                # The active record shows up as absent, this could be genuine,
1747
1712
                # or it could be present at some other location. We need to
1748
1713
                # verify.
1763
1728
                        # link it.
1764
1729
                        continue
1765
1730
                    real_active_kind = active_entry[1][0][0]
1766
 
                    if real_active_kind in (b'a', b'r'):
 
1731
                    if real_active_kind in 'ar':
1767
1732
                        # We found a record, which was not *this* record,
1768
1733
                        # which matches the file_id, but is not actually
1769
1734
                        # present. Something seems *really* wrong.
1770
1735
                        self._raise_invalid(new_path, file_id,
1771
 
                                            "We found a tree0 entry that doesnt make sense")
 
1736
                            "We found a tree0 entry that doesnt make sense")
1772
1737
                    # Now, we've found a tree0 entry which matches the file_id
1773
1738
                    # but is at a different location. So update them to be
1774
1739
                    # rename records.
1775
1740
                    active_dir, active_name = active_entry[0][:2]
1776
1741
                    if active_dir:
1777
 
                        active_path = active_dir + b'/' + active_name
 
1742
                        active_path = active_dir + '/' + active_name
1778
1743
                    else:
1779
1744
                        active_path = active_name
1780
 
                    active_entry[1][1] = st(b'r', new_path, 0, False, b'')
1781
 
                    entry[1][0] = st(b'r', active_path, 0, False, b'')
1782
 
            elif active_kind == b'r':
 
1745
                    active_entry[1][1] = st('r', new_path, 0, False, '')
 
1746
                    entry[1][0] = st('r', active_path, 0, False, '')
 
1747
            elif active_kind == 'r':
1783
1748
                raise NotImplementedError()
1784
1749
 
1785
1750
            new_kind = new_details[0]
1786
 
            if new_kind == b'd':
 
1751
            if new_kind == 'd':
1787
1752
                self._ensure_block(block_index, entry_index, new_path)
1788
1753
 
1789
1754
    def _update_basis_apply_changes(self, changes):
1792
1757
        :param adds: A sequence of changes. Each change is a tuple:
1793
1758
            (path_utf8, path_utf8, file_id, (entry_details))
1794
1759
        """
 
1760
        absent = 'ar'
1795
1761
        for old_path, new_path, file_id, new_details in changes:
1796
1762
            # the entry for this file_id must be in tree 0.
1797
1763
            entry = self._get_entry(1, file_id, new_path)
1798
 
            if entry[0] is None or entry[1][1][0] in (b'a', b'r'):
 
1764
            if entry[0] is None or entry[1][1][0] in 'ar':
1799
1765
                self._raise_invalid(new_path, file_id,
1800
 
                                    'changed entry considered not present')
 
1766
                    'changed entry considered not present')
1801
1767
            entry[1][1] = new_details
1802
1768
 
1803
1769
    def _update_basis_apply_deletes(self, deletes):
1822
1788
                self._get_block_entry_index(dirname, basename, 1)
1823
1789
            if not file_present:
1824
1790
                self._raise_invalid(old_path, file_id,
1825
 
                                    'basis tree does not contain removed entry')
 
1791
                    'basis tree does not contain removed entry')
1826
1792
            entry = self._dirblocks[block_index][1][entry_index]
1827
1793
            # The state of the entry in the 'active' WT
1828
1794
            active_kind = entry[1][0][0]
1829
1795
            if entry[0][2] != file_id:
1830
1796
                self._raise_invalid(old_path, file_id,
1831
 
                                    'mismatched file_id in tree 1')
 
1797
                    'mismatched file_id in tree 1')
1832
1798
            dir_block = ()
1833
1799
            old_kind = entry[1][1][0]
1834
 
            if active_kind in b'ar':
 
1800
            if active_kind in 'ar':
1835
1801
                # The active tree doesn't have this file_id.
1836
1802
                # The basis tree is changing this record. If this is a
1837
1803
                # rename, then we don't want the record here at all
1838
1804
                # anymore. If it is just an in-place change, we want the
1839
1805
                # record here, but we'll add it if we need to. So we just
1840
1806
                # delete it
1841
 
                if active_kind == b'r':
 
1807
                if active_kind == 'r':
1842
1808
                    active_path = entry[1][0][1]
1843
1809
                    active_entry = self._get_entry(0, file_id, active_path)
1844
 
                    if active_entry[1][1][0] != b'r':
1845
 
                        self._raise_invalid(old_path, file_id,
1846
 
                                            "Dirstate did not have matching rename entries")
1847
 
                    elif active_entry[1][0][0] in b'ar':
1848
 
                        self._raise_invalid(old_path, file_id,
1849
 
                                            "Dirstate had a rename pointing at an inactive"
1850
 
                                            " tree0")
 
1810
                    if active_entry[1][1][0] != 'r':
 
1811
                        self._raise_invalid(old_path, file_id,
 
1812
                            "Dirstate did not have matching rename entries")
 
1813
                    elif active_entry[1][0][0] in 'ar':
 
1814
                        self._raise_invalid(old_path, file_id,
 
1815
                            "Dirstate had a rename pointing at an inactive"
 
1816
                            " tree0")
1851
1817
                    active_entry[1][1] = null
1852
1818
                del self._dirblocks[block_index][1][entry_index]
1853
 
                if old_kind == b'd':
 
1819
                if old_kind == 'd':
1854
1820
                    # This was a directory, and the active tree says it
1855
1821
                    # doesn't exist, and now the basis tree says it doesn't
1856
1822
                    # exist. Remove its dirblock if present
1857
1823
                    (dir_block_index,
1858
1824
                     present) = self._find_block_index_from_key(
1859
 
                        (old_path, b'', b''))
 
1825
                        (old_path, '', ''))
1860
1826
                    if present:
1861
1827
                        dir_block = self._dirblocks[dir_block_index][1]
1862
1828
                        if not dir_block:
1867
1833
                # removed.
1868
1834
                entry[1][1] = null
1869
1835
                block_i, entry_i, d_present, f_present = \
1870
 
                    self._get_block_entry_index(old_path, b'', 1)
 
1836
                    self._get_block_entry_index(old_path, '', 1)
1871
1837
                if d_present:
1872
1838
                    dir_block = self._dirblocks[block_i][1]
1873
1839
            for child_entry in dir_block:
1874
1840
                child_basis_kind = child_entry[1][1][0]
1875
 
                if child_basis_kind not in b'ar':
 
1841
                if child_basis_kind not in 'ar':
1876
1842
                    self._raise_invalid(old_path, file_id,
1877
 
                                        "The file id was deleted but its children were "
1878
 
                                        "not deleted.")
 
1843
                        "The file id was deleted but its children were "
 
1844
                        "not deleted.")
1879
1845
 
1880
1846
    def _after_delta_check_parents(self, parents, index):
1881
1847
        """Check that parents required by the delta are all intact.
1882
 
 
 
1848
        
1883
1849
        :param parents: An iterable of (path_utf8, file_id) tuples which are
1884
1850
            required to be present in tree 'index' at path_utf8 with id file_id
1885
1851
            and be a directory.
1891
1857
            entry = self._get_entry(index, file_id, dirname_utf8)
1892
1858
            if entry[1] is None:
1893
1859
                self._raise_invalid(dirname_utf8.decode('utf8'),
1894
 
                                    file_id, "This parent is not present.")
 
1860
                    file_id, "This parent is not present.")
1895
1861
            # Parents of things must be directories
1896
 
            if entry[1][index][0] != b'd':
 
1862
            if entry[1][index][0] != 'd':
1897
1863
                self._raise_invalid(dirname_utf8.decode('utf8'),
1898
 
                                    file_id, "This parent is not a directory.")
 
1864
                    file_id, "This parent is not a directory.")
1899
1865
 
1900
1866
    def _observed_sha1(self, entry, sha1, stat_value,
1901
 
                       _stat_to_minikind=_stat_to_minikind):
 
1867
        _stat_to_minikind=_stat_to_minikind):
1902
1868
        """Note the sha1 of a file.
1903
1869
 
1904
1870
        :param entry: The entry the sha1 is for.
1906
1872
        :param stat_value: The os.lstat for the file.
1907
1873
        """
1908
1874
        try:
1909
 
            minikind = _stat_to_minikind[stat_value.st_mode & 0o170000]
 
1875
            minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
1910
1876
        except KeyError:
1911
1877
            # Unhandled kind
1912
1878
            return None
1913
 
        if minikind == b'f':
 
1879
        if minikind == 'f':
1914
1880
            if self._cutoff_time is None:
1915
1881
                self._sha_cutoff_time()
1916
1882
            if (stat_value.st_mtime < self._cutoff_time
1917
 
                    and stat_value.st_ctime < self._cutoff_time):
1918
 
                entry[1][0] = (b'f', sha1, stat_value.st_size, entry[1][0][3],
 
1883
                and stat_value.st_ctime < self._cutoff_time):
 
1884
                entry[1][0] = ('f', sha1, stat_value.st_size, entry[1][0][3],
1919
1885
                               pack_stat(stat_value))
1920
1886
                self._mark_modified([entry])
1921
1887
 
1944
1910
 
1945
1911
    def _is_executable(self, mode, old_executable):
1946
1912
        """Is this file executable?"""
1947
 
        if self._use_filesystem_for_exec:
1948
 
            return bool(S_IEXEC & mode)
1949
 
        else:
1950
 
            return old_executable
 
1913
        return bool(S_IEXEC & mode)
 
1914
 
 
1915
    def _is_executable_win32(self, mode, old_executable):
 
1916
        """On win32 the executable bit is stored in the dirstate."""
 
1917
        return old_executable
 
1918
 
 
1919
    if sys.platform == 'win32':
 
1920
        _is_executable = _is_executable_win32
1951
1921
 
1952
1922
    def _read_link(self, abspath, old_link):
1953
1923
        """Read the target of a symlink"""
1956
1926
        #       higher level, because there either won't be anything on disk,
1957
1927
        #       or the thing on disk will be a file.
1958
1928
        fs_encoding = osutils._fs_enc
1959
 
        if isinstance(abspath, str):
 
1929
        if isinstance(abspath, unicode):
1960
1930
            # abspath is defined as the path to pass to lstat. readlink is
1961
1931
            # buggy in python < 2.6 (it doesn't encode unicode path into FS
1962
1932
            # encoding), so we need to encode ourselves knowing that unicode
1976
1946
    def get_lines(self):
1977
1947
        """Serialise the entire dirstate to a sequence of lines."""
1978
1948
        if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and
1979
 
                self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):
 
1949
            self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):
1980
1950
            # read what's on disk.
1981
1951
            self._state_file.seek(0)
1982
1952
            return self._state_file.readlines()
1983
1953
        lines = []
1984
1954
        lines.append(self._get_parents_line(self.get_parent_ids()))
1985
1955
        lines.append(self._get_ghosts_line(self._ghosts))
1986
 
        lines.extend(self._iter_entry_lines())
 
1956
        lines.extend(self._get_entry_lines())
1987
1957
        return self._get_output_lines(lines)
1988
1958
 
1989
1959
    def _get_ghosts_line(self, ghost_ids):
1990
1960
        """Create a line for the state file for ghost information."""
1991
 
        return b'\0'.join([b'%d' % len(ghost_ids)] + ghost_ids)
 
1961
        return '\0'.join([str(len(ghost_ids))] + ghost_ids)
1992
1962
 
1993
1963
    def _get_parents_line(self, parent_ids):
1994
1964
        """Create a line for the state file for parents information."""
1995
 
        return b'\0'.join([b'%d' % len(parent_ids)] + parent_ids)
 
1965
        return '\0'.join([str(len(parent_ids))] + parent_ids)
1996
1966
 
1997
 
    def _iter_entry_lines(self):
 
1967
    def _get_entry_lines(self):
1998
1968
        """Create lines for entries."""
1999
1969
        return map(self._entry_to_line, self._iter_entries())
2000
1970
 
2012
1982
            def fields_to_entry_0_parents(fields, _int=int):
2013
1983
                path_name_file_id_key = (fields[0], fields[1], fields[2])
2014
1984
                return (path_name_file_id_key, [
2015
 
                    (  # Current tree
 
1985
                    ( # Current tree
2016
1986
                        fields[3],                # minikind
2017
1987
                        fields[4],                # fingerprint
2018
1988
                        _int(fields[5]),          # size
2019
 
                        fields[6] == b'y',         # executable
 
1989
                        fields[6] == 'y',         # executable
2020
1990
                        fields[7],                # packed_stat or revision_id
2021
1991
                    )])
2022
1992
            return fields_to_entry_0_parents
2024
1994
            def fields_to_entry_1_parent(fields, _int=int):
2025
1995
                path_name_file_id_key = (fields[0], fields[1], fields[2])
2026
1996
                return (path_name_file_id_key, [
2027
 
                    (  # Current tree
 
1997
                    ( # Current tree
2028
1998
                        fields[3],                # minikind
2029
1999
                        fields[4],                # fingerprint
2030
2000
                        _int(fields[5]),          # size
2031
 
                        fields[6] == b'y',         # executable
 
2001
                        fields[6] == 'y',         # executable
2032
2002
                        fields[7],                # packed_stat or revision_id
2033
2003
                    ),
2034
 
                    (  # Parent 1
 
2004
                    ( # Parent 1
2035
2005
                        fields[8],                # minikind
2036
2006
                        fields[9],                # fingerprint
2037
2007
                        _int(fields[10]),         # size
2038
 
                        fields[11] == b'y',        # executable
 
2008
                        fields[11] == 'y',        # executable
2039
2009
                        fields[12],               # packed_stat or revision_id
2040
2010
                    ),
2041
2011
                    ])
2044
2014
            def fields_to_entry_2_parents(fields, _int=int):
2045
2015
                path_name_file_id_key = (fields[0], fields[1], fields[2])
2046
2016
                return (path_name_file_id_key, [
2047
 
                    (  # Current tree
 
2017
                    ( # Current tree
2048
2018
                        fields[3],                # minikind
2049
2019
                        fields[4],                # fingerprint
2050
2020
                        _int(fields[5]),          # size
2051
 
                        fields[6] == b'y',         # executable
 
2021
                        fields[6] == 'y',         # executable
2052
2022
                        fields[7],                # packed_stat or revision_id
2053
2023
                    ),
2054
 
                    (  # Parent 1
 
2024
                    ( # Parent 1
2055
2025
                        fields[8],                # minikind
2056
2026
                        fields[9],                # fingerprint
2057
2027
                        _int(fields[10]),         # size
2058
 
                        fields[11] == b'y',        # executable
 
2028
                        fields[11] == 'y',        # executable
2059
2029
                        fields[12],               # packed_stat or revision_id
2060
2030
                    ),
2061
 
                    (  # Parent 2
 
2031
                    ( # Parent 2
2062
2032
                        fields[13],               # minikind
2063
2033
                        fields[14],               # fingerprint
2064
2034
                        _int(fields[15]),         # size
2065
 
                        fields[16] == b'y',        # executable
 
2035
                        fields[16] == 'y',        # executable
2066
2036
                        fields[17],               # packed_stat or revision_id
2067
2037
                    ),
2068
2038
                    ])
2071
2041
            def fields_to_entry_n_parents(fields, _int=int):
2072
2042
                path_name_file_id_key = (fields[0], fields[1], fields[2])
2073
2043
                trees = [(fields[cur],                # minikind
2074
 
                          fields[cur + 1],              # fingerprint
2075
 
                          _int(fields[cur + 2]),        # size
2076
 
                          fields[cur + 3] == b'y',       # executable
2077
 
                          fields[cur + 4],              # stat or revision_id
2078
 
                          ) for cur in range(3, len(fields) - 1, 5)]
 
2044
                          fields[cur+1],              # fingerprint
 
2045
                          _int(fields[cur+2]),        # size
 
2046
                          fields[cur+3] == 'y',       # executable
 
2047
                          fields[cur+4],              # stat or revision_id
 
2048
                         ) for cur in xrange(3, len(fields)-1, 5)]
2079
2049
                return path_name_file_id_key, trees
2080
2050
            return fields_to_entry_n_parents
2081
2051
 
2105
2075
            tree present there.
2106
2076
        """
2107
2077
        self._read_dirblocks_if_needed()
2108
 
        key = dirname, basename, b''
 
2078
        key = dirname, basename, ''
2109
2079
        block_index, present = self._find_block_index_from_key(key)
2110
2080
        if not present:
2111
2081
            # no such directory - return the dir index and 0 for the row.
2112
2082
            return block_index, 0, False, False
2113
 
        block = self._dirblocks[block_index][1]  # access the entries only
 
2083
        block = self._dirblocks[block_index][1] # access the entries only
2114
2084
        entry_index, present = self._find_entry_index(key, block)
2115
2085
        # linear search through entries at this path to find the one
2116
2086
        # requested.
2117
2087
        while entry_index < len(block) and block[entry_index][0][1] == basename:
2118
 
            if block[entry_index][1][tree_index][0] not in (b'a', b'r'):
 
2088
            if block[entry_index][1][tree_index][0] not in 'ar':
2119
2089
                # neither absent or relocated
2120
2090
                return block_index, entry_index, True, True
2121
2091
            entry_index += 1
2143
2113
        """
2144
2114
        self._read_dirblocks_if_needed()
2145
2115
        if path_utf8 is not None:
2146
 
            if not isinstance(path_utf8, bytes):
2147
 
                raise errors.BzrError('path_utf8 is not bytes: %s %r'
2148
 
                                      % (type(path_utf8), path_utf8))
 
2116
            if type(path_utf8) is not str:
 
2117
                raise errors.BzrError('path_utf8 is not a str: %s %r'
 
2118
                    % (type(path_utf8), path_utf8))
2149
2119
            # path lookups are faster
2150
2120
            dirname, basename = osutils.split(path_utf8)
2151
2121
            block_index, entry_index, dir_present, file_present = \
2153
2123
            if not file_present:
2154
2124
                return None, None
2155
2125
            entry = self._dirblocks[block_index][1][entry_index]
2156
 
            if not (entry[0][2] and entry[1][tree_index][0] not in (b'a', b'r')):
 
2126
            if not (entry[0][2] and entry[1][tree_index][0] not in ('a', 'r')):
2157
2127
                raise AssertionError('unversioned entry?')
2158
2128
            if fileid_utf8:
2159
2129
                if entry[0][2] != fileid_utf8:
2181
2151
                    entry = self._dirblocks[block_index][1][entry_index]
2182
2152
                    # TODO: We might want to assert that entry[0][2] ==
2183
2153
                    #       fileid_utf8.
2184
 
                    # GZ 2017-06-09: Hoist set of minkinds somewhere
2185
 
                    if entry[1][tree_index][0] in {b'f', b'd', b'l', b't'}:
 
2154
                    if entry[1][tree_index][0] in 'fdlt':
2186
2155
                        # this is the result we are looking for: the
2187
2156
                        # real home of this file_id in this tree.
2188
2157
                        return entry
2189
 
                    if entry[1][tree_index][0] == b'a':
 
2158
                    if entry[1][tree_index][0] == 'a':
2190
2159
                        # there is no home for this entry in this tree
2191
2160
                        if include_deleted:
2192
2161
                            return entry
2193
2162
                        return None, None
2194
 
                    if entry[1][tree_index][0] != b'r':
 
2163
                    if entry[1][tree_index][0] != 'r':
2195
2164
                        raise AssertionError(
2196
 
                            "entry %r has invalid minikind %r for tree %r"
 
2165
                            "entry %r has invalid minikind %r for tree %r" \
2197
2166
                            % (entry,
2198
2167
                               entry[1][tree_index][0],
2199
2168
                               tree_index))
2200
2169
                    real_path = entry[1][tree_index][1]
2201
2170
                    return self._get_entry(tree_index, fileid_utf8=fileid_utf8,
2202
 
                                           path_utf8=real_path)
 
2171
                        path_utf8=real_path)
2203
2172
            return None, None
2204
2173
 
2205
2174
    @classmethod
2223
2192
            sha1_provider = DefaultSHA1Provider()
2224
2193
        result = cls(path, sha1_provider)
2225
2194
        # root dir and root dir contents with no children.
2226
 
        empty_tree_dirblocks = [(b'', []), (b'', [])]
 
2195
        empty_tree_dirblocks = [('', []), ('', [])]
2227
2196
        # a new root directory, with a NULLSTAT.
2228
2197
        empty_tree_dirblocks[0][1].append(
2229
 
            ((b'', b'', inventory.ROOT_ID), [
2230
 
                (b'd', b'', 0, False, DirState.NULLSTAT),
 
2198
            (('', '', inventory.ROOT_ID), [
 
2199
                ('d', '', 0, False, DirState.NULLSTAT),
2231
2200
            ]))
2232
2201
        result.lock_write()
2233
2202
        try:
2251
2220
        minikind = DirState._kind_to_minikind[kind]
2252
2221
        tree_data = inv_entry.revision
2253
2222
        if kind == 'directory':
2254
 
            fingerprint = b''
 
2223
            fingerprint = ''
2255
2224
            size = 0
2256
2225
            executable = False
2257
2226
        elif kind == 'symlink':
2258
2227
            if inv_entry.symlink_target is None:
2259
 
                fingerprint = b''
 
2228
                fingerprint = ''
2260
2229
            else:
2261
2230
                fingerprint = inv_entry.symlink_target.encode('utf8')
2262
2231
            size = 0
2263
2232
            executable = False
2264
2233
        elif kind == 'file':
2265
 
            fingerprint = inv_entry.text_sha1 or b''
 
2234
            fingerprint = inv_entry.text_sha1 or ''
2266
2235
            size = inv_entry.text_size or 0
2267
2236
            executable = inv_entry.executable
2268
2237
        elif kind == 'tree-reference':
2269
 
            fingerprint = inv_entry.reference_revision or b''
 
2238
            fingerprint = inv_entry.reference_revision or ''
2270
2239
            size = 0
2271
2240
            executable = False
2272
2241
        else:
2277
2246
    def _iter_child_entries(self, tree_index, path_utf8):
2278
2247
        """Iterate over all the entries that are children of path_utf.
2279
2248
 
2280
 
        This only returns entries that are present (not in b'a', b'r') in
 
2249
        This only returns entries that are present (not in 'a', 'r') in
2281
2250
        tree_index. tree_index data is not refreshed, so if tree 0 is used,
2282
2251
        results may differ from that obtained if paths were statted to
2283
2252
        determine what ones were directories.
2287
2256
        """
2288
2257
        pending_dirs = []
2289
2258
        next_pending_dirs = [path_utf8]
2290
 
        absent = (b'a', b'r')
 
2259
        absent = 'ar'
2291
2260
        while next_pending_dirs:
2292
2261
            pending_dirs = next_pending_dirs
2293
2262
            next_pending_dirs = []
2294
2263
            for path in pending_dirs:
2295
2264
                block_index, present = self._find_block_index_from_key(
2296
 
                    (path, b'', b''))
 
2265
                    (path, '', ''))
2297
2266
                if block_index == 0:
2298
2267
                    block_index = 1
2299
2268
                    if len(self._dirblocks) == 1:
2308
2277
                    kind = entry[1][tree_index][0]
2309
2278
                    if kind not in absent:
2310
2279
                        yield entry
2311
 
                    if kind == b'd':
 
2280
                    if kind == 'd':
2312
2281
                        if entry[0][0]:
2313
 
                            path = entry[0][0] + b'/' + entry[0][1]
 
2282
                            path = entry[0][0] + '/' + entry[0][1]
2314
2283
                        else:
2315
2284
                            path = entry[0][1]
2316
2285
                        next_pending_dirs.append(path)
2319
2288
        """Iterate over all the entries in the dirstate.
2320
2289
 
2321
2290
        Each yelt item is an entry in the standard format described in the
2322
 
        docstring of breezy.dirstate.
 
2291
        docstring of brzlib.dirstate.
2323
2292
        """
2324
2293
        self._read_dirblocks_if_needed()
2325
2294
        for directory in self._dirblocks:
2375
2344
            path lines.
2376
2345
        """
2377
2346
        output_lines = [DirState.HEADER_FORMAT_3]
2378
 
        lines.append(b'')  # a final newline
2379
 
        inventory_text = b'\0\n\0'.join(lines)
2380
 
        output_lines.append(b'crc32: %d\n' % (zlib.crc32(inventory_text),))
 
2347
        lines.append('') # a final newline
 
2348
        inventory_text = '\0\n\0'.join(lines)
 
2349
        output_lines.append('crc32: %s\n' % (zlib.crc32(inventory_text),))
2381
2350
        # -3, 1 for num parents, 1 for ghosts, 1 for final newline
2382
 
        num_entries = len(lines) - 3
2383
 
        output_lines.append(b'num_entries: %d\n' % (num_entries,))
 
2351
        num_entries = len(lines)-3
 
2352
        output_lines.append('num_entries: %s\n' % (num_entries,))
2384
2353
        output_lines.append(inventory_text)
2385
2354
        return output_lines
2386
2355
 
2387
2356
    def _make_deleted_row(self, fileid_utf8, parents):
2388
2357
        """Return a deleted row for fileid_utf8."""
2389
 
        return (b'/', b'RECYCLED.BIN', b'file', fileid_utf8, 0, DirState.NULLSTAT,
2390
 
                b''), parents
 
2358
        return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,
 
2359
            ''), parents
2391
2360
 
2392
2361
    def _num_present_parents(self):
2393
2362
        """The number of parent entries in each record row."""
2394
2363
        return len(self._parents) - len(self._ghosts)
2395
2364
 
2396
2365
    @classmethod
2397
 
    def on_file(cls, path, sha1_provider=None, worth_saving_limit=0,
2398
 
                use_filesystem_for_exec=True):
 
2366
    def on_file(cls, path, sha1_provider=None, worth_saving_limit=0):
2399
2367
        """Construct a DirState on the file at path "path".
2400
2368
 
2401
2369
        :param path: The path at which the dirstate file on disk should live.
2404
2372
        :param worth_saving_limit: when the exact number of hash changed
2405
2373
            entries is known, only bother saving the dirstate if more than
2406
2374
            this count of entries have changed. -1 means never save.
2407
 
        :param use_filesystem_for_exec: Whether to trust the filesystem
2408
 
            for executable bit information
2409
2375
        :return: An unlocked DirState object, associated with the given path.
2410
2376
        """
2411
2377
        if sha1_provider is None:
2412
2378
            sha1_provider = DefaultSHA1Provider()
2413
2379
        result = cls(path, sha1_provider,
2414
 
                     worth_saving_limit=worth_saving_limit,
2415
 
                     use_filesystem_for_exec=use_filesystem_for_exec)
 
2380
                     worth_saving_limit=worth_saving_limit)
2416
2381
        return result
2417
2382
 
2418
2383
    def _read_dirblocks_if_needed(self):
2436
2401
        """
2437
2402
        self._read_prelude()
2438
2403
        parent_line = self._state_file.readline()
2439
 
        info = parent_line.split(b'\0')
 
2404
        info = parent_line.split('\0')
2440
2405
        num_parents = int(info[0])
2441
2406
        self._parents = info[1:-1]
2442
2407
        ghost_line = self._state_file.readline()
2443
 
        info = ghost_line.split(b'\0')
 
2408
        info = ghost_line.split('\0')
2444
2409
        num_ghosts = int(info[1])
2445
2410
        self._ghosts = info[2:-1]
2446
2411
        self._header_state = DirState.IN_MEMORY_UNMODIFIED
2468
2433
            raise errors.BzrError(
2469
2434
                'invalid header line: %r' % (header,))
2470
2435
        crc_line = self._state_file.readline()
2471
 
        if not crc_line.startswith(b'crc32: '):
 
2436
        if not crc_line.startswith('crc32: '):
2472
2437
            raise errors.BzrError('missing crc32 checksum: %r' % crc_line)
2473
 
        self.crc_expected = int(crc_line[len(b'crc32: '):-1])
 
2438
        self.crc_expected = int(crc_line[len('crc32: '):-1])
2474
2439
        num_entries_line = self._state_file.readline()
2475
 
        if not num_entries_line.startswith(b'num_entries: '):
 
2440
        if not num_entries_line.startswith('num_entries: '):
2476
2441
            raise errors.BzrError('missing num_entries line')
2477
 
        self._num_entries = int(num_entries_line[len(b'num_entries: '):-1])
 
2442
        self._num_entries = int(num_entries_line[len('num_entries: '):-1])
2478
2443
 
2479
2444
    def sha1_from_stat(self, path, stat_result):
2480
2445
        """Find a sha1 given a stat lookup."""
2485
2450
        if self._packed_stat_index is None:
2486
2451
            index = {}
2487
2452
            for key, tree_details in self._iter_entries():
2488
 
                if tree_details[0][0] == b'f':
 
2453
                if tree_details[0][0] == 'f':
2489
2454
                    index[tree_details[0][4]] = tree_details[0][1]
2490
2455
            self._packed_stat_index = index
2491
2456
        return self._packed_stat_index
2508
2473
            # Should this be a warning? For now, I'm expecting that places that
2509
2474
            # mark it inconsistent will warn, making a warning here redundant.
2510
2475
            trace.mutter('Not saving DirState because '
2511
 
                         '_changes_aborted is set.')
 
2476
                    '_changes_aborted is set.')
2512
2477
            return
2513
2478
        # TODO: Since we now distinguish IN_MEMORY_MODIFIED from
2514
2479
        #       IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail
2543
2508
                self._state_file = self._lock_token.f
2544
2509
                # TODO: jam 20070315 We should validate the disk file has
2545
2510
                #       not changed contents. Since restore_read_lock may
2546
 
                #       not be an atomic operation.
 
2511
                #       not be an atomic operation.                
2547
2512
 
2548
2513
    def _maybe_fdatasync(self):
2549
2514
        """Flush to disk if possible and if not configured off."""
2553
2518
    def _worth_saving(self):
2554
2519
        """Is it worth saving the dirstate or not?"""
2555
2520
        if (self._header_state == DirState.IN_MEMORY_MODIFIED
2556
 
                or self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
 
2521
            or self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2557
2522
            return True
2558
2523
        if self._dirblock_state == DirState.IN_MEMORY_HASH_MODIFIED:
2559
2524
            if self._worth_saving_limit == -1:
2591
2556
    def set_path_id(self, path, new_id):
2592
2557
        """Change the id of path to new_id in the current working tree.
2593
2558
 
2594
 
        :param path: The path inside the tree to set - b'' is the root, 'foo'
 
2559
        :param path: The path inside the tree to set - '' is the root, 'foo'
2595
2560
            is the path foo in the root.
2596
2561
        :param new_id: The new id to assign to the path. This must be a utf8
2597
2562
            file id (not unicode, and not None).
2605
2570
        if entry[0][2] == new_id:
2606
2571
            # Nothing to change.
2607
2572
            return
2608
 
        if new_id.__class__ != bytes:
2609
 
            raise AssertionError(
2610
 
                "must be a utf8 file_id not %s" % (type(new_id), ))
2611
2573
        # mark the old path absent, and insert a new root path
2612
2574
        self._make_absent(entry)
2613
 
        self.update_minimal((b'', b'', new_id), b'd',
2614
 
                            path_utf8=b'', packed_stat=entry[1][0][4])
 
2575
        self.update_minimal(('', '', new_id), 'd',
 
2576
            path_utf8='', packed_stat=entry[1][0][4])
2615
2577
        self._mark_modified()
2616
2578
 
2617
2579
    def set_parent_trees(self, trees, ghosts):
2668
2630
        # one: the current tree
2669
2631
        for entry in self._iter_entries():
2670
2632
            # skip entries not in the current tree
2671
 
            if entry[1][0][0] in (b'a', b'r'):  # absent, relocated
 
2633
            if entry[1][0][0] in 'ar': # absent, relocated
2672
2634
                continue
2673
2635
            by_path[entry[0]] = [entry[1][0]] + \
2674
2636
                [DirState.NULL_PARENT_DETAILS] * parent_count
2684
2646
            # any fileid in this tree as we set the by_path[id] to:
2685
2647
            # already_processed_tree_details + new_details + new_location_suffix
2686
2648
            # the suffix is from tree_index+1:parent_count+1.
2687
 
            new_location_suffix = [
2688
 
                DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)
 
2649
            new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)
2689
2650
            # now stitch in all the entries from this tree
2690
2651
            last_dirname = None
2691
2652
            for path, entry in tree.iter_entries_by_dir():
2719
2680
                        # other trees, so put absent pointers there
2720
2681
                        # This is the vertical axis in the matrix, all pointing
2721
2682
                        # to the real path.
2722
 
                        by_path[entry_key][tree_index] = st(b'r', path_utf8, 0,
2723
 
                                                            False, b'')
 
2683
                        by_path[entry_key][tree_index] = st('r', path_utf8, 0,
 
2684
                                                            False, '')
2724
2685
                # by path consistency: Insert into an existing path record
2725
2686
                # (trivial), or add a new one with relocation pointers for the
2726
2687
                # other tree indexes.
2734
2695
                    # mapping from path,id. We need to look up the correct path
2735
2696
                    # for the indexes from 0 to tree_index -1
2736
2697
                    new_details = []
2737
 
                    for lookup_index in range(tree_index):
 
2698
                    for lookup_index in xrange(tree_index):
2738
2699
                        # boundary case: this is the first occurence of file_id
2739
2700
                        # so there are no id_indexes, possibly take this out of
2740
2701
                        # the loop?
2742
2703
                            new_details.append(DirState.NULL_PARENT_DETAILS)
2743
2704
                        else:
2744
2705
                            # grab any one entry, use it to find the right path.
2745
 
                            a_key = next(iter(entry_keys))
2746
 
                            if by_path[a_key][lookup_index][0] in (b'r', b'a'):
 
2706
                            a_key = iter(entry_keys).next()
 
2707
                            if by_path[a_key][lookup_index][0] in ('r', 'a'):
2747
2708
                                # its a pointer or missing statement, use it as
2748
2709
                                # is.
2749
 
                                new_details.append(
2750
 
                                    by_path[a_key][lookup_index])
 
2710
                                new_details.append(by_path[a_key][lookup_index])
2751
2711
                            else:
2752
2712
                                # we have the right key, make a pointer to it.
2753
 
                                real_path = (b'/'.join(a_key[0:2])).strip(b'/')
2754
 
                                new_details.append(st(b'r', real_path, 0, False,
2755
 
                                                      b''))
 
2713
                                real_path = ('/'.join(a_key[0:2])).strip('/')
 
2714
                                new_details.append(st('r', real_path, 0, False,
 
2715
                                                      ''))
2756
2716
                    new_details.append(self._inv_entry_to_details(entry))
2757
2717
                    new_details.extend(new_location_suffix)
2758
2718
                    by_path[new_entry_key] = new_details
2779
2739
        # Saving time and objects. Also, use StaticTuple to avoid putting all
2780
2740
        # of these object into python's garbage collector.
2781
2741
        split_dirs = {}
2782
 
 
2783
2742
        def _key(entry, _split_dirs=split_dirs, _st=static_tuple.StaticTuple):
2784
2743
            # sort by: directory parts, file name, file id
2785
2744
            dirpath, fname, file_id = entry[0]
2786
2745
            try:
2787
2746
                split = _split_dirs[dirpath]
2788
2747
            except KeyError:
2789
 
                split = _st.from_sequence(dirpath.split(b'/'))
 
2748
                split = _st.from_sequence(dirpath.split('/'))
2790
2749
                _split_dirs[dirpath] = split
2791
2750
            return _st(split, fname, file_id)
2792
2751
        return sorted(entry_list, key=_key)
2801
2760
        """
2802
2761
        if 'evil' in debug.debug_flags:
2803
2762
            trace.mutter_callsite(1,
2804
 
                                  "set_state_from_inventory called; please mutate the tree instead")
 
2763
                "set_state_from_inventory called; please mutate the tree instead")
2805
2764
        tracing = 'dirstate' in debug.debug_flags
2806
2765
        if tracing:
2807
2766
            trace.mutter("set_state_from_inventory trace:")
2824
2783
        # underlying dirstate.
2825
2784
        old_iterator = iter(list(self._iter_entries()))
2826
2785
        # both must have roots so this is safe:
2827
 
        current_new = next(new_iterator)
2828
 
        current_old = next(old_iterator)
2829
 
 
 
2786
        current_new = new_iterator.next()
 
2787
        current_old = old_iterator.next()
2830
2788
        def advance(iterator):
2831
2789
            try:
2832
 
                return next(iterator)
 
2790
                return iterator.next()
2833
2791
            except StopIteration:
2834
2792
                return None
2835
2793
        while current_new or current_old:
2836
2794
            # skip entries in old that are not really there
2837
 
            if current_old and current_old[1][0][0] in (b'a', b'r'):
 
2795
            if current_old and current_old[1][0][0] in 'ar':
2838
2796
                # relocated or absent
2839
2797
                current_old = advance(old_iterator)
2840
2798
                continue
2846
2804
                new_entry_key = (new_dirname, new_basename, new_id)
2847
2805
                current_new_minikind = \
2848
2806
                    DirState._kind_to_minikind[current_new[1].kind]
2849
 
                if current_new_minikind == b't':
2850
 
                    fingerprint = current_new[1].reference_revision or b''
 
2807
                if current_new_minikind == 't':
 
2808
                    fingerprint = current_new[1].reference_revision or ''
2851
2809
                else:
2852
2810
                    # We normally only insert or remove records, or update
2853
2811
                    # them when it has significantly changed.  Then we want to
2854
2812
                    # erase its fingerprint.  Unaffected records should
2855
2813
                    # normally not be updated at all.
2856
 
                    fingerprint = b''
 
2814
                    fingerprint = ''
2857
2815
            else:
2858
2816
                # for safety disable variables
2859
2817
                new_path_utf8 = new_dirname = new_basename = new_id = \
2864
2822
                # old is finished: insert current_new into the state.
2865
2823
                if tracing:
2866
2824
                    trace.mutter("Appending from new '%s'.",
2867
 
                                 new_path_utf8.decode('utf8'))
 
2825
                        new_path_utf8.decode('utf8'))
2868
2826
                self.update_minimal(new_entry_key, current_new_minikind,
2869
 
                                    executable=current_new[1].executable,
2870
 
                                    path_utf8=new_path_utf8, fingerprint=fingerprint,
2871
 
                                    fullscan=True)
 
2827
                    executable=current_new[1].executable,
 
2828
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2829
                    fullscan=True)
2872
2830
                current_new = advance(new_iterator)
2873
2831
            elif not current_new:
2874
2832
                # new is finished
2875
2833
                if tracing:
2876
2834
                    trace.mutter("Truncating from old '%s/%s'.",
2877
 
                                 current_old[0][0].decode('utf8'),
2878
 
                                 current_old[0][1].decode('utf8'))
 
2835
                        current_old[0][0].decode('utf8'),
 
2836
                        current_old[0][1].decode('utf8'))
2879
2837
                self._make_absent(current_old)
2880
2838
                current_old = advance(old_iterator)
2881
2839
            elif new_entry_key == current_old[0]:
2887
2845
                # the minimal required trigger is if the execute bit or cached
2888
2846
                # kind has changed.
2889
2847
                if (current_old[1][0][3] != current_new[1].executable or
2890
 
                        current_old[1][0][0] != current_new_minikind):
 
2848
                    current_old[1][0][0] != current_new_minikind):
2891
2849
                    if tracing:
2892
2850
                        trace.mutter("Updating in-place change '%s'.",
2893
 
                                     new_path_utf8.decode('utf8'))
 
2851
                            new_path_utf8.decode('utf8'))
2894
2852
                    self.update_minimal(current_old[0], current_new_minikind,
2895
 
                                        executable=current_new[1].executable,
2896
 
                                        path_utf8=new_path_utf8, fingerprint=fingerprint,
2897
 
                                        fullscan=True)
 
2853
                        executable=current_new[1].executable,
 
2854
                        path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2855
                        fullscan=True)
2898
2856
                # both sides are dealt with, move on
2899
2857
                current_old = advance(old_iterator)
2900
2858
                current_new = advance(new_iterator)
2901
 
            elif (lt_by_dirs(new_dirname, current_old[0][0])
2902
 
                  or (new_dirname == current_old[0][0] and
2903
 
                      new_entry_key[1:] < current_old[0][1:])):
 
2859
            elif (cmp_by_dirs(new_dirname, current_old[0][0]) < 0
 
2860
                  or (new_dirname == current_old[0][0]
 
2861
                      and new_entry_key[1:] < current_old[0][1:])):
2904
2862
                # new comes before:
2905
2863
                # add a entry for this and advance new
2906
2864
                if tracing:
2907
2865
                    trace.mutter("Inserting from new '%s'.",
2908
 
                                 new_path_utf8.decode('utf8'))
 
2866
                        new_path_utf8.decode('utf8'))
2909
2867
                self.update_minimal(new_entry_key, current_new_minikind,
2910
 
                                    executable=current_new[1].executable,
2911
 
                                    path_utf8=new_path_utf8, fingerprint=fingerprint,
2912
 
                                    fullscan=True)
 
2868
                    executable=current_new[1].executable,
 
2869
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2870
                    fullscan=True)
2913
2871
                current_new = advance(new_iterator)
2914
2872
            else:
2915
2873
                # we've advanced past the place where the old key would be,
2916
2874
                # without seeing it in the new list.  so it must be gone.
2917
2875
                if tracing:
2918
2876
                    trace.mutter("Deleting from old '%s/%s'.",
2919
 
                                 current_old[0][0].decode('utf8'),
2920
 
                                 current_old[0][1].decode('utf8'))
 
2877
                        current_old[0][0].decode('utf8'),
 
2878
                        current_old[0][1].decode('utf8'))
2921
2879
                self._make_absent(current_old)
2922
2880
                current_old = advance(old_iterator)
2923
2881
        self._mark_modified()
2936
2894
        self._requires_lock()
2937
2895
        # root dir and root dir contents with no children. We have to have a
2938
2896
        # root for set_state_from_inventory to work correctly.
2939
 
        empty_root = ((b'', b'', inventory.ROOT_ID),
2940
 
                      [(b'd', b'', 0, False, DirState.NULLSTAT)])
2941
 
        empty_tree_dirblocks = [(b'', [empty_root]), (b'', [])]
 
2897
        empty_root = (('', '', inventory.ROOT_ID),
 
2898
                      [('d', '', 0, False, DirState.NULLSTAT)])
 
2899
        empty_tree_dirblocks = [('', [empty_root]), ('', [])]
2942
2900
        self._set_data([], empty_tree_dirblocks)
2943
2901
        self.set_state_from_inventory(working_inv)
2944
2902
        self.set_parent_trees(parent_trees, parent_ghosts)
2955
2913
        all_remaining_keys = set()
2956
2914
        # Dont check the working tree, because it's going.
2957
2915
        for details in current_old[1][1:]:
2958
 
            if details[0] not in (b'a', b'r'):  # absent, relocated
 
2916
            if details[0] not in 'ar': # absent, relocated
2959
2917
                all_remaining_keys.add(current_old[0])
2960
 
            elif details[0] == b'r':  # relocated
 
2918
            elif details[0] == 'r': # relocated
2961
2919
                # record the key for the real path.
2962
 
                all_remaining_keys.add(
2963
 
                    tuple(osutils.split(details[1])) + (current_old[0][2],))
 
2920
                all_remaining_keys.add(tuple(osutils.split(details[1])) + (current_old[0][2],))
2964
2921
            # absent rows are not present at any path.
2965
2922
        last_reference = current_old[0] not in all_remaining_keys
2966
2923
        if last_reference:
2968
2925
            # absent), and relocated or absent entries for the other trees:
2969
2926
            # Remove it, its meaningless.
2970
2927
            block = self._find_block(current_old[0])
2971
 
            entry_index, present = self._find_entry_index(
2972
 
                current_old[0], block[1])
 
2928
            entry_index, present = self._find_entry_index(current_old[0], block[1])
2973
2929
            if not present:
2974
 
                raise AssertionError(
2975
 
                    'could not find entry for %s' % (current_old,))
 
2930
                raise AssertionError('could not find entry for %s' % (current_old,))
2976
2931
            block[1].pop(entry_index)
2977
2932
            # if we have an id_index in use, remove this key from it for this id.
2978
2933
            if self._id_index is not None:
2985
2940
            update_block_index, present = \
2986
2941
                self._find_block_index_from_key(update_key)
2987
2942
            if not present:
2988
 
                raise AssertionError(
2989
 
                    'could not find block for %s' % (update_key,))
 
2943
                raise AssertionError('could not find block for %s' % (update_key,))
2990
2944
            update_entry_index, present = \
2991
 
                self._find_entry_index(
2992
 
                    update_key, self._dirblocks[update_block_index][1])
 
2945
                self._find_entry_index(update_key, self._dirblocks[update_block_index][1])
2993
2946
            if not present:
2994
 
                raise AssertionError(
2995
 
                    'could not find entry for %s' % (update_key,))
 
2947
                raise AssertionError('could not find entry for %s' % (update_key,))
2996
2948
            update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]
2997
2949
            # it must not be absent at the moment
2998
 
            if update_tree_details[0][0] == b'a':  # absent
 
2950
            if update_tree_details[0][0] == 'a': # absent
2999
2951
                raise AssertionError('bad row %r' % (update_tree_details,))
3000
2952
            update_tree_details[0] = DirState.NULL_PARENT_DETAILS
3001
2953
        self._mark_modified()
3002
2954
        return last_reference
3003
2955
 
3004
 
    def update_minimal(self, key, minikind, executable=False, fingerprint=b'',
3005
 
                       packed_stat=None, size=0, path_utf8=None, fullscan=False):
 
2956
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
 
2957
        packed_stat=None, size=0, path_utf8=None, fullscan=False):
3006
2958
        """Update an entry to the state in tree 0.
3007
2959
 
3008
2960
        This will either create a new entry at 'key' or update an existing one.
3010
2962
        updated as well.
3011
2963
 
3012
2964
        :param key: (dir, name, file_id) for the new entry
3013
 
        :param minikind: The type for the entry (b'f' == 'file', b'd' ==
 
2965
        :param minikind: The type for the entry ('f' == 'file', 'd' ==
3014
2966
                'directory'), etc.
3015
2967
        :param executable: Should the executable bit be set?
3016
2968
        :param fingerprint: Simple fingerprint for new entry: canonical-form
3029
2981
        block = self._find_block(key)[1]
3030
2982
        if packed_stat is None:
3031
2983
            packed_stat = DirState.NULLSTAT
3032
 
        # XXX: Some callers pass b'' as the packed_stat, and it seems to be
 
2984
        # XXX: Some callers pass '' as the packed_stat, and it seems to be
3033
2985
        # sometimes present in the dirstate - this seems oddly inconsistent.
3034
2986
        # mbp 20071008
3035
2987
        entry_index, present = self._find_entry_index(key, block)
3038
2990
        if not present:
3039
2991
            # New record. Check there isn't a entry at this path already.
3040
2992
            if not fullscan:
3041
 
                low_index, _ = self._find_entry_index(key[0:2] + (b'',), block)
 
2993
                low_index, _ = self._find_entry_index(key[0:2] + ('',), block)
3042
2994
                while low_index < len(block):
3043
2995
                    entry = block[low_index]
3044
2996
                    if entry[0][0:2] == key[0:2]:
3045
 
                        if entry[1][0][0] not in (b'a', b'r'):
 
2997
                        if entry[1][0][0] not in 'ar':
3046
2998
                            # This entry has the same path (but a different id) as
3047
2999
                            # the new entry we're adding, and is present in ths
3048
3000
                            # tree.
3049
3001
                            self._raise_invalid(
3050
 
                                (b"%s/%s" % key[0:2]).decode('utf8'), key[2],
 
3002
                                ("%s/%s" % key[0:2]).decode('utf8'), key[2],
3051
3003
                                "Attempt to add item at path already occupied by "
3052
3004
                                "id %r" % entry[0][2])
3053
3005
                        low_index += 1
3090
3042
                    # entry, if not already examined, is skipped over by that
3091
3043
                    # loop.
3092
3044
                    other_entry = other_block[other_entry_index]
3093
 
                    other_entry[1][0] = (b'r', path_utf8, 0, False, b'')
 
3045
                    other_entry[1][0] = ('r', path_utf8, 0, False, '')
3094
3046
                    if self._maybe_remove_row(other_block, other_entry_index,
3095
3047
                                              id_index):
3096
3048
                        # If the row holding this was removed, we need to
3106
3058
                    # TODO: This re-evaluates the existing_keys set, do we need
3107
3059
                    #       to do that ourselves?
3108
3060
                    other_key = list(existing_keys)[0]
3109
 
                for lookup_index in range(1, num_present_parents + 1):
 
3061
                for lookup_index in xrange(1, num_present_parents + 1):
3110
3062
                    # grab any one entry, use it to find the right path.
3111
3063
                    # TODO: optimise this to reduce memory use in highly
3112
3064
                    # fragmented situations by reusing the relocation
3114
3066
                    update_block_index, present = \
3115
3067
                        self._find_block_index_from_key(other_key)
3116
3068
                    if not present:
3117
 
                        raise AssertionError(
3118
 
                            'could not find block for %s' % (other_key,))
 
3069
                        raise AssertionError('could not find block for %s' % (other_key,))
3119
3070
                    update_entry_index, present = \
3120
 
                        self._find_entry_index(
3121
 
                            other_key, self._dirblocks[update_block_index][1])
 
3071
                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
3122
3072
                    if not present:
3123
 
                        raise AssertionError(
3124
 
                            'update_minimal: could not find entry for %s' % (other_key,))
 
3073
                        raise AssertionError('update_minimal: could not find entry for %s' % (other_key,))
3125
3074
                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
3126
 
                    if update_details[0] in (b'a', b'r'):  # relocated, absent
 
3075
                    if update_details[0] in 'ar': # relocated, absent
3127
3076
                        # its a pointer or absent in lookup_index's tree, use
3128
3077
                        # it as is.
3129
3078
                        new_entry[1].append(update_details)
3130
3079
                    else:
3131
3080
                        # we have the right key, make a pointer to it.
3132
3081
                        pointer_path = osutils.pathjoin(*other_key[0:2])
3133
 
                        new_entry[1].append(
3134
 
                            (b'r', pointer_path, 0, False, b''))
 
3082
                        new_entry[1].append(('r', pointer_path, 0, False, ''))
3135
3083
            block.insert(entry_index, new_entry)
3136
3084
            self._add_to_id_index(id_index, key)
3137
3085
        else:
3151
3099
            existing_keys = id_index.get(key[2], ())
3152
3100
            if key not in existing_keys:
3153
3101
                raise AssertionError('We found the entry in the blocks, but'
3154
 
                                     ' the key is not in the id_index.'
3155
 
                                     ' key: %s, existing_keys: %s' % (key, existing_keys))
 
3102
                    ' the key is not in the id_index.'
 
3103
                    ' key: %s, existing_keys: %s' % (key, existing_keys))
3156
3104
            for entry_key in existing_keys:
3157
3105
                # TODO:PROFILING: It might be faster to just update
3158
3106
                # rather than checking if we need to, and then overwrite
3162
3110
                    # other trees, so put absent pointers there
3163
3111
                    # This is the vertical axis in the matrix, all pointing
3164
3112
                    # to the real path.
3165
 
                    block_index, present = self._find_block_index_from_key(
3166
 
                        entry_key)
 
3113
                    block_index, present = self._find_block_index_from_key(entry_key)
3167
3114
                    if not present:
3168
3115
                        raise AssertionError('not present: %r', entry_key)
3169
 
                    entry_index, present = self._find_entry_index(
3170
 
                        entry_key, self._dirblocks[block_index][1])
 
3116
                    entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])
3171
3117
                    if not present:
3172
3118
                        raise AssertionError('not present: %r', entry_key)
3173
3119
                    self._dirblocks[block_index][1][entry_index][1][0] = \
3174
 
                        (b'r', path_utf8, 0, False, b'')
 
3120
                        ('r', path_utf8, 0, False, '')
3175
3121
        # add a containing dirblock if needed.
3176
 
        if new_details[0] == b'd':
3177
 
            # GZ 2017-06-09: Using pathjoin why?
3178
 
            subdir_key = (osutils.pathjoin(*key[0:2]), b'', b'')
 
3122
        if new_details[0] == 'd':
 
3123
            subdir_key = (osutils.pathjoin(*key[0:2]), '', '')
3179
3124
            block_index, present = self._find_block_index_from_key(subdir_key)
3180
3125
            if not present:
3181
3126
                self._dirblocks.insert(block_index, (subdir_key[0], []))
3184
3129
 
3185
3130
    def _maybe_remove_row(self, block, index, id_index):
3186
3131
        """Remove index if it is absent or relocated across the row.
3187
 
 
 
3132
        
3188
3133
        id_index is updated accordingly.
3189
3134
        :return: True if we removed the row, False otherwise
3190
3135
        """
3191
3136
        present_in_row = False
3192
3137
        entry = block[index]
3193
3138
        for column in entry[1]:
3194
 
            if column[0] not in (b'a', b'r'):
 
3139
            if column[0] not in 'ar':
3195
3140
                present_in_row = True
3196
3141
                break
3197
3142
        if not present_in_row:
3211
3156
        # NOTE: This must always raise AssertionError not just assert,
3212
3157
        # otherwise it may not behave properly under python -O
3213
3158
        #
3214
 
        # TODO: All entries must have some content that's not b'a' or b'r',
 
3159
        # TODO: All entries must have some content that's not 'a' or 'r',
3215
3160
        # otherwise it could just be removed.
3216
3161
        #
3217
3162
        # TODO: All relocations must point directly to a real entry.
3222
3167
        from pprint import pformat
3223
3168
        self._read_dirblocks_if_needed()
3224
3169
        if len(self._dirblocks) > 0:
3225
 
            if not self._dirblocks[0][0] == b'':
 
3170
            if not self._dirblocks[0][0] == '':
3226
3171
                raise AssertionError(
3227
 
                    "dirblocks don't start with root block:\n"
3228
 
                    + pformat(self._dirblocks))
 
3172
                    "dirblocks don't start with root block:\n" + \
 
3173
                    pformat(self._dirblocks))
3229
3174
        if len(self._dirblocks) > 1:
3230
 
            if not self._dirblocks[1][0] == b'':
 
3175
            if not self._dirblocks[1][0] == '':
3231
3176
                raise AssertionError(
3232
 
                    "dirblocks missing root directory:\n"
3233
 
                    + pformat(self._dirblocks))
 
3177
                    "dirblocks missing root directory:\n" + \
 
3178
                    pformat(self._dirblocks))
3234
3179
        # the dirblocks are sorted by their path components, name, and dir id
3235
 
        dir_names = [d[0].split(b'/')
3236
 
                     for d in self._dirblocks[1:]]
 
3180
        dir_names = [d[0].split('/')
 
3181
                for d in self._dirblocks[1:]]
3237
3182
        if dir_names != sorted(dir_names):
3238
3183
            raise AssertionError(
3239
 
                "dir names are not in sorted order:\n" +
3240
 
                pformat(self._dirblocks) +
 
3184
                "dir names are not in sorted order:\n" + \
 
3185
                pformat(self._dirblocks) + \
3241
3186
                "\nkeys:\n" +
3242
3187
                pformat(dir_names))
3243
3188
        for dirblock in self._dirblocks:
3251
3196
                        (entry, pformat(dirblock)))
3252
3197
            if dirblock[1] != sorted(dirblock[1]):
3253
3198
                raise AssertionError(
3254
 
                    "dirblock for %r is not sorted:\n%s" %
 
3199
                    "dirblock for %r is not sorted:\n%s" % \
3255
3200
                    (dirblock[0], pformat(dirblock)))
3256
3201
 
3257
3202
        def check_valid_parent():
3262
3207
            current tree. (It is invalid to have a non-absent file in an absent
3263
3208
            directory.)
3264
3209
            """
3265
 
            if entry[0][0:2] == (b'', b''):
 
3210
            if entry[0][0:2] == ('', ''):
3266
3211
                # There should be no parent for the root row
3267
3212
                return
3268
3213
            parent_entry = self._get_entry(tree_index, path_utf8=entry[0][0])
3270
3215
                raise AssertionError(
3271
3216
                    "no parent entry for: %s in tree %s"
3272
3217
                    % (this_path, tree_index))
3273
 
            if parent_entry[1][tree_index][0] != b'd':
 
3218
            if parent_entry[1][tree_index][0] != 'd':
3274
3219
                raise AssertionError(
3275
3220
                    "Parent entry for %s is not marked as a valid"
3276
3221
                    " directory. %s" % (this_path, parent_entry,))
3284
3229
        # We check this with a dict per tree pointing either to the present
3285
3230
        # name, or None if absent.
3286
3231
        tree_count = self._num_present_parents() + 1
3287
 
        id_path_maps = [{} for _ in range(tree_count)]
 
3232
        id_path_maps = [dict() for i in range(tree_count)]
3288
3233
        # Make sure that all renamed entries point to the correct location.
3289
3234
        for entry in self._iter_entries():
3290
3235
            file_id = entry[0][2]
3291
3236
            this_path = osutils.pathjoin(entry[0][0], entry[0][1])
3292
3237
            if len(entry[1]) != tree_count:
3293
3238
                raise AssertionError(
3294
 
                    "wrong number of entry details for row\n%s"
3295
 
                    ",\nexpected %d" %
3296
 
                    (pformat(entry), tree_count))
 
3239
                "wrong number of entry details for row\n%s" \
 
3240
                ",\nexpected %d" % \
 
3241
                (pformat(entry), tree_count))
3297
3242
            absent_positions = 0
3298
3243
            for tree_index, tree_state in enumerate(entry[1]):
3299
3244
                this_tree_map = id_path_maps[tree_index]
3300
3245
                minikind = tree_state[0]
3301
 
                if minikind in (b'a', b'r'):
 
3246
                if minikind in 'ar':
3302
3247
                    absent_positions += 1
3303
3248
                # have we seen this id before in this column?
3304
3249
                if file_id in this_tree_map:
3305
3250
                    previous_path, previous_loc = this_tree_map[file_id]
3306
3251
                    # any later mention of this file must be consistent with
3307
3252
                    # what was said before
3308
 
                    if minikind == b'a':
 
3253
                    if minikind == 'a':
3309
3254
                        if previous_path is not None:
3310
3255
                            raise AssertionError(
3311
 
                                "file %s is absent in row %r but also present "
3312
 
                                "at %r" %
3313
 
                                (file_id.decode('utf-8'), entry, previous_path))
3314
 
                    elif minikind == b'r':
 
3256
                            "file %s is absent in row %r but also present " \
 
3257
                            "at %r"% \
 
3258
                            (file_id, entry, previous_path))
 
3259
                    elif minikind == 'r':
3315
3260
                        target_location = tree_state[1]
3316
3261
                        if previous_path != target_location:
3317
3262
                            raise AssertionError(
3318
 
                                "file %s relocation in row %r but also at %r"
3319
 
                                % (file_id, entry, previous_path))
 
3263
                            "file %s relocation in row %r but also at %r" \
 
3264
                            % (file_id, entry, previous_path))
3320
3265
                    else:
3321
3266
                        # a file, directory, etc - may have been previously
3322
3267
                        # pointed to by a relocation, which must point here
3327
3272
                                (entry, previous_path, previous_loc))
3328
3273
                        check_valid_parent()
3329
3274
                else:
3330
 
                    if minikind == b'a':
 
3275
                    if minikind == 'a':
3331
3276
                        # absent; should not occur anywhere else
3332
3277
                        this_tree_map[file_id] = None, this_path
3333
 
                    elif minikind == b'r':
 
3278
                    elif minikind == 'r':
3334
3279
                        # relocation, must occur at expected location
3335
3280
                        this_tree_map[file_id] = tree_state[1], this_path
3336
3281
                    else:
3340
3285
                raise AssertionError(
3341
3286
                    "entry %r has no data for any tree." % (entry,))
3342
3287
        if self._id_index is not None:
3343
 
            for file_id, entry_keys in self._id_index.items():
 
3288
            for file_id, entry_keys in self._id_index.iteritems():
3344
3289
                for entry_key in entry_keys:
3345
3290
                    # Check that the entry in the map is pointing to the same
3346
3291
                    # file_id
3350
3295
                            % (file_id, entry_key))
3351
3296
                    # And that from this entry key, we can look up the original
3352
3297
                    # record
3353
 
                    block_index, present = self._find_block_index_from_key(
3354
 
                        entry_key)
3355
 
                    if not present:
3356
 
                        raise AssertionError(
3357
 
                            'missing block for entry key: %r', entry_key)
3358
 
                    entry_index, present = self._find_entry_index(
3359
 
                        entry_key, self._dirblocks[block_index][1])
3360
 
                    if not present:
3361
 
                        raise AssertionError(
3362
 
                            'missing entry for key: %r', entry_key)
 
3298
                    block_index, present = self._find_block_index_from_key(entry_key)
 
3299
                    if not present:
 
3300
                        raise AssertionError('missing block for entry key: %r', entry_key)
 
3301
                    entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])
 
3302
                    if not present:
 
3303
                        raise AssertionError('missing entry for key: %r', entry_key)
3363
3304
                if len(entry_keys) != len(set(entry_keys)):
3364
3305
                    raise AssertionError(
3365
3306
                        'id_index contained non-unique data for %s'
3391
3332
        self._lock_state = 'r'
3392
3333
        self._state_file = self._lock_token.f
3393
3334
        self._wipe_state()
3394
 
        return lock.LogicalLockResult(self.unlock)
3395
3335
 
3396
3336
    def lock_write(self):
3397
3337
        """Acquire a write lock on the dirstate."""
3405
3345
        self._lock_state = 'w'
3406
3346
        self._state_file = self._lock_token.f
3407
3347
        self._wipe_state()
3408
 
        return lock.LogicalLockResult(self.unlock, self._lock_token)
3409
3348
 
3410
3349
    def unlock(self):
3411
3350
        """Drop any locks held on the dirstate."""
3428
3367
 
3429
3368
 
3430
3369
def py_update_entry(state, entry, abspath, stat_value,
3431
 
                    _stat_to_minikind=DirState._stat_to_minikind):
 
3370
                 _stat_to_minikind=DirState._stat_to_minikind):
3432
3371
    """Update the entry based on what is actually on disk.
3433
3372
 
3434
3373
    This function only calculates the sha if it needs to - if the entry is
3443
3382
        target of a symlink.
3444
3383
    """
3445
3384
    try:
3446
 
        minikind = _stat_to_minikind[stat_value.st_mode & 0o170000]
 
3385
        minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
3447
3386
    except KeyError:
3448
3387
        # Unhandled kind
3449
3388
        return None
3450
3389
    packed_stat = pack_stat(stat_value)
3451
3390
    (saved_minikind, saved_link_or_sha1, saved_file_size,
3452
3391
     saved_executable, saved_packed_stat) = entry[1][0]
3453
 
    if not isinstance(saved_minikind, bytes):
3454
 
        raise TypeError(saved_minikind)
3455
3392
 
3456
 
    if minikind == b'd' and saved_minikind == b't':
3457
 
        minikind = b't'
 
3393
    if minikind == 'd' and saved_minikind == 't':
 
3394
        minikind = 't'
3458
3395
    if (minikind == saved_minikind
3459
 
            and packed_stat == saved_packed_stat):
 
3396
        and packed_stat == saved_packed_stat):
3460
3397
        # The stat hasn't changed since we saved, so we can re-use the
3461
3398
        # saved sha hash.
3462
 
        if minikind == b'd':
 
3399
        if minikind == 'd':
3463
3400
            return None
3464
3401
 
3465
3402
        # size should also be in packed_stat
3470
3407
    # process this entry.
3471
3408
    link_or_sha1 = None
3472
3409
    worth_saving = True
3473
 
    if minikind == b'f':
 
3410
    if minikind == 'f':
3474
3411
        executable = state._is_executable(stat_value.st_mode,
3475
 
                                          saved_executable)
 
3412
                                         saved_executable)
3476
3413
        if state._cutoff_time is None:
3477
3414
            state._sha_cutoff_time()
3478
3415
        if (stat_value.st_mtime < state._cutoff_time
3479
3416
            and stat_value.st_ctime < state._cutoff_time
3480
3417
            and len(entry[1]) > 1
3481
 
                and entry[1][1][0] != b'a'):
 
3418
            and entry[1][1][0] != 'a'):
3482
3419
            # Could check for size changes for further optimised
3483
3420
            # avoidance of sha1's. However the most prominent case of
3484
3421
            # over-shaing is during initial add, which this catches.
3486
3423
            # are calculated at the same time, so checking just the size
3487
3424
            # gains nothing w.r.t. performance.
3488
3425
            link_or_sha1 = state._sha1_file(abspath)
3489
 
            entry[1][0] = (b'f', link_or_sha1, stat_value.st_size,
 
3426
            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
3490
3427
                           executable, packed_stat)
3491
3428
        else:
3492
 
            entry[1][0] = (b'f', b'', stat_value.st_size,
 
3429
            entry[1][0] = ('f', '', stat_value.st_size,
3493
3430
                           executable, DirState.NULLSTAT)
3494
3431
            worth_saving = False
3495
 
    elif minikind == b'd':
 
3432
    elif minikind == 'd':
3496
3433
        link_or_sha1 = None
3497
 
        entry[1][0] = (b'd', b'', 0, False, packed_stat)
3498
 
        if saved_minikind != b'd':
 
3434
        entry[1][0] = ('d', '', 0, False, packed_stat)
 
3435
        if saved_minikind != 'd':
3499
3436
            # This changed from something into a directory. Make sure we
3500
3437
            # have a directory block for it. This doesn't happen very
3501
3438
            # often, so this doesn't have to be super fast.
3502
3439
            block_index, entry_index, dir_present, file_present = \
3503
3440
                state._get_block_entry_index(entry[0][0], entry[0][1], 0)
3504
3441
            state._ensure_block(block_index, entry_index,
3505
 
                                osutils.pathjoin(entry[0][0], entry[0][1]))
 
3442
                               osutils.pathjoin(entry[0][0], entry[0][1]))
3506
3443
        else:
3507
3444
            worth_saving = False
3508
 
    elif minikind == b'l':
3509
 
        if saved_minikind == b'l':
 
3445
    elif minikind == 'l':
 
3446
        if saved_minikind == 'l':
3510
3447
            worth_saving = False
3511
3448
        link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
3512
3449
        if state._cutoff_time is None:
3513
3450
            state._sha_cutoff_time()
3514
3451
        if (stat_value.st_mtime < state._cutoff_time
3515
 
                and stat_value.st_ctime < state._cutoff_time):
3516
 
            entry[1][0] = (b'l', link_or_sha1, stat_value.st_size,
 
3452
            and stat_value.st_ctime < state._cutoff_time):
 
3453
            entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
3517
3454
                           False, packed_stat)
3518
3455
        else:
3519
 
            entry[1][0] = (b'l', b'', stat_value.st_size,
 
3456
            entry[1][0] = ('l', '', stat_value.st_size,
3520
3457
                           False, DirState.NULLSTAT)
3521
3458
    if worth_saving:
3522
3459
        state._mark_modified([entry])
3526
3463
class ProcessEntryPython(object):
3527
3464
 
3528
3465
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
3529
 
                 "last_source_parent", "last_target_parent", "include_unchanged",
3530
 
                 "partial", "use_filesystem_for_exec", "utf8_decode",
3531
 
                 "searched_specific_files", "search_specific_files",
3532
 
                 "searched_exact_paths", "search_specific_file_parents", "seen_ids",
3533
 
                 "state", "source_index", "target_index", "want_unversioned", "tree"]
 
3466
        "last_source_parent", "last_target_parent", "include_unchanged",
 
3467
        "partial", "use_filesystem_for_exec", "utf8_decode",
 
3468
        "searched_specific_files", "search_specific_files",
 
3469
        "searched_exact_paths", "search_specific_file_parents", "seen_ids",
 
3470
        "state", "source_index", "target_index", "want_unversioned", "tree"]
3534
3471
 
3535
3472
    def __init__(self, include_unchanged, use_filesystem_for_exec,
3536
 
                 search_specific_files, state, source_index, target_index,
3537
 
                 want_unversioned, tree):
 
3473
        search_specific_files, state, source_index, target_index,
 
3474
        want_unversioned, tree):
3538
3475
        self.old_dirname_to_file_id = {}
3539
3476
        self.new_dirname_to_file_id = {}
3540
3477
        # Are we doing a partial iter_changes?
3541
 
        self.partial = search_specific_files != {''}
 
3478
        self.partial = search_specific_files != set([''])
3542
3479
        # Using a list so that we can access the values and change them in
3543
3480
        # nested scope. Each one is [path, file_id, entry]
3544
3481
        self.last_source_parent = [None, None]
3573
3510
        """Compare an entry and real disk to generate delta information.
3574
3511
 
3575
3512
        :param path_info: top_relpath, basename, kind, lstat, abspath for
3576
 
            the path of entry. If None, then the path is considered absent in
 
3513
            the path of entry. If None, then the path is considered absent in 
3577
3514
            the target (Perhaps we should pass in a concrete entry for this ?)
3578
3515
            Basename is returned as a utf8 string because we expect this
3579
3516
            tuple will be ignored, and don't want to take the time to
3590
3527
            source_details = DirState.NULL_PARENT_DETAILS
3591
3528
        else:
3592
3529
            source_details = entry[1][self.source_index]
3593
 
        # GZ 2017-06-09: Eck, more sets.
3594
 
        _fdltr = {b'f', b'd', b'l', b't', b'r'}
3595
 
        _fdlt = {b'f', b'd', b'l', b't'}
3596
 
        _ra = (b'r', b'a')
3597
3530
        target_details = entry[1][self.target_index]
3598
3531
        target_minikind = target_details[0]
3599
 
        if path_info is not None and target_minikind in _fdlt:
 
3532
        if path_info is not None and target_minikind in 'fdlt':
3600
3533
            if not (self.target_index == 0):
3601
3534
                raise AssertionError()
3602
3535
            link_or_sha1 = update_entry(self.state, entry,
3603
 
                                        abspath=path_info[4], stat_value=path_info[3])
 
3536
                abspath=path_info[4], stat_value=path_info[3])
3604
3537
            # The entry may have been modified by update_entry
3605
3538
            target_details = entry[1][self.target_index]
3606
3539
            target_minikind = target_details[0]
3608
3541
            link_or_sha1 = None
3609
3542
        file_id = entry[0][2]
3610
3543
        source_minikind = source_details[0]
3611
 
        if source_minikind in _fdltr and target_minikind in _fdlt:
 
3544
        if source_minikind in 'fdltr' and target_minikind in 'fdlt':
3612
3545
            # claimed content in both: diff
3613
3546
            #   r    | fdlt   |      | add source to search, add id path move and perform
3614
3547
            #        |        |      | diff check on source-target
3615
3548
            #   r    | fdlt   |  a   | dangling file that was present in the basis.
3616
3549
            #        |        |      | ???
3617
 
            if source_minikind == b'r':
 
3550
            if source_minikind in 'r':
3618
3551
                # add the source to the search path to find any children it
3619
3552
                # has.  TODO ? : only add if it is a container ?
3620
3553
                if not osutils.is_inside_any(self.searched_specific_files,
3626
3559
                old_dirname, old_basename = os.path.split(old_path)
3627
3560
                path = pathjoin(entry[0][0], entry[0][1])
3628
3561
                old_entry = self.state._get_entry(self.source_index,
3629
 
                                                  path_utf8=old_path)
 
3562
                                             path_utf8=old_path)
3630
3563
                # update the source details variable to be the real
3631
3564
                # location.
3632
3565
                if old_entry == (None, None):
3633
 
                    raise DirstateCorrupt(self.state._filename,
3634
 
                                          "entry '%s/%s' is considered renamed from %r"
3635
 
                                          " but source does not exist\n"
3636
 
                                          "entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
 
3566
                    raise errors.CorruptDirstate(self.state._filename,
 
3567
                        "entry '%s/%s' is considered renamed from %r"
 
3568
                        " but source does not exist\n"
 
3569
                        "entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
3637
3570
                source_details = old_entry[1][self.source_index]
3638
3571
                source_minikind = source_details[0]
3639
3572
            else:
3652
3585
                    if path is None:
3653
3586
                        old_path = path = pathjoin(old_dirname, old_basename)
3654
3587
                    self.new_dirname_to_file_id[path] = file_id
3655
 
                    if source_minikind != b'd':
 
3588
                    if source_minikind != 'd':
3656
3589
                        content_change = True
3657
3590
                    else:
3658
3591
                        # directories have no fingerprint
3659
3592
                        content_change = False
3660
3593
                    target_exec = False
3661
3594
                elif target_kind == 'file':
3662
 
                    if source_minikind != b'f':
 
3595
                    if source_minikind != 'f':
3663
3596
                        content_change = True
3664
3597
                    else:
3665
3598
                        # Check the sha. We can't just rely on the size as
3669
3602
                            # Stat cache miss:
3670
3603
                            statvalue, link_or_sha1 = \
3671
3604
                                self.state._sha1_provider.stat_and_sha1(
3672
 
                                    path_info[4])
 
3605
                                path_info[4])
3673
3606
                            self.state._observed_sha1(entry, link_or_sha1,
3674
 
                                                      statvalue)
 
3607
                                statvalue)
3675
3608
                        content_change = (link_or_sha1 != source_details[1])
3676
3609
                    # Target details is updated at update_entry time
3677
3610
                    if self.use_filesystem_for_exec:
3681
3614
                    else:
3682
3615
                        target_exec = target_details[3]
3683
3616
                elif target_kind == 'symlink':
3684
 
                    if source_minikind != b'l':
 
3617
                    if source_minikind != 'l':
3685
3618
                        content_change = True
3686
3619
                    else:
3687
3620
                        content_change = (link_or_sha1 != source_details[1])
3688
3621
                    target_exec = False
3689
3622
                elif target_kind == 'tree-reference':
3690
 
                    if source_minikind != b't':
 
3623
                    if source_minikind != 't':
3691
3624
                        content_change = True
3692
3625
                    else:
3693
3626
                        content_change = False
3696
3629
                    if path is None:
3697
3630
                        path = pathjoin(old_dirname, old_basename)
3698
3631
                    raise errors.BadFileKindError(path, path_info[2])
3699
 
            if source_minikind == b'd':
 
3632
            if source_minikind == 'd':
3700
3633
                if path is None:
3701
3634
                    old_path = path = pathjoin(old_dirname, old_basename)
3702
3635
                self.old_dirname_to_file_id[old_path] = file_id
3708
3641
                    source_parent_id = self.old_dirname_to_file_id[old_dirname]
3709
3642
                except KeyError:
3710
3643
                    source_parent_entry = self.state._get_entry(self.source_index,
3711
 
                                                                path_utf8=old_dirname)
 
3644
                                                           path_utf8=old_dirname)
3712
3645
                    source_parent_id = source_parent_entry[0][2]
3713
3646
                if source_parent_id == entry[0][2]:
3714
3647
                    # This is the root, so the parent is None
3726
3659
                    # TODO: We don't always need to do the lookup, because the
3727
3660
                    #       parent entry will be the same as the source entry.
3728
3661
                    target_parent_entry = self.state._get_entry(self.target_index,
3729
 
                                                                path_utf8=new_dirname)
 
3662
                                                           path_utf8=new_dirname)
3730
3663
                    if target_parent_entry == (None, None):
3731
3664
                        raise AssertionError(
3732
3665
                            "Could not find target parent in wt: %s\nparent of: %s"
3741
3674
 
3742
3675
            source_exec = source_details[3]
3743
3676
            changed = (content_change
3744
 
                       or source_parent_id != target_parent_id
3745
 
                       or old_basename != entry[0][1]
3746
 
                       or source_exec != target_exec
3747
 
                       )
 
3677
                or source_parent_id != target_parent_id
 
3678
                or old_basename != entry[0][1]
 
3679
                or source_exec != target_exec
 
3680
                )
3748
3681
            if not changed and not self.include_unchanged:
3749
3682
                return None, False
3750
3683
            else:
3759
3692
                    else:
3760
3693
                        path_u = self.utf8_decode(path)[0]
3761
3694
                source_kind = DirState._minikind_to_kind[source_minikind]
3762
 
                return TreeChange(
3763
 
                    entry[0][2],
3764
 
                    (old_path_u, path_u),
3765
 
                    content_change,
3766
 
                    (True, True),
3767
 
                    (source_parent_id, target_parent_id),
3768
 
                    (self.utf8_decode(old_basename)[
3769
 
                     0], self.utf8_decode(entry[0][1])[0]),
3770
 
                    (source_kind, target_kind),
3771
 
                    (source_exec, target_exec)), changed
3772
 
        elif source_minikind in b'a' and target_minikind in _fdlt:
 
3695
                return (entry[0][2],
 
3696
                       (old_path_u, path_u),
 
3697
                       content_change,
 
3698
                       (True, True),
 
3699
                       (source_parent_id, target_parent_id),
 
3700
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
 
3701
                       (source_kind, target_kind),
 
3702
                       (source_exec, target_exec)), changed
 
3703
        elif source_minikind in 'a' and target_minikind in 'fdlt':
3773
3704
            # looks like a new file
3774
3705
            path = pathjoin(entry[0][0], entry[0][1])
3775
3706
            # parent id is the entry for the path in the target tree
3776
3707
            # TODO: these are the same for an entire directory: cache em.
3777
3708
            parent_id = self.state._get_entry(self.target_index,
3778
 
                                              path_utf8=entry[0][0])[0][2]
 
3709
                                         path_utf8=entry[0][0])[0][2]
3779
3710
            if parent_id == entry[0][2]:
3780
3711
                parent_id = None
3781
3712
            if path_info is not None:
3788
3719
                        and stat.S_IEXEC & path_info[3].st_mode)
3789
3720
                else:
3790
3721
                    target_exec = target_details[3]
3791
 
                return TreeChange(
3792
 
                    entry[0][2],
3793
 
                    (None, self.utf8_decode(path)[0]),
3794
 
                    True,
3795
 
                    (False, True),
3796
 
                    (None, parent_id),
3797
 
                    (None, self.utf8_decode(entry[0][1])[0]),
3798
 
                    (None, path_info[2]),
3799
 
                    (None, target_exec)), True
 
3722
                return (entry[0][2],
 
3723
                       (None, self.utf8_decode(path)[0]),
 
3724
                       True,
 
3725
                       (False, True),
 
3726
                       (None, parent_id),
 
3727
                       (None, self.utf8_decode(entry[0][1])[0]),
 
3728
                       (None, path_info[2]),
 
3729
                       (None, target_exec)), True
3800
3730
            else:
3801
3731
                # Its a missing file, report it as such.
3802
 
                return TreeChange(
3803
 
                    entry[0][2],
3804
 
                    (None, self.utf8_decode(path)[0]),
3805
 
                    False,
3806
 
                    (False, True),
3807
 
                    (None, parent_id),
3808
 
                    (None, self.utf8_decode(entry[0][1])[0]),
3809
 
                    (None, None),
3810
 
                    (None, False)), True
3811
 
        elif source_minikind in _fdlt and target_minikind in b'a':
 
3732
                return (entry[0][2],
 
3733
                       (None, self.utf8_decode(path)[0]),
 
3734
                       False,
 
3735
                       (False, True),
 
3736
                       (None, parent_id),
 
3737
                       (None, self.utf8_decode(entry[0][1])[0]),
 
3738
                       (None, None),
 
3739
                       (None, False)), True
 
3740
        elif source_minikind in 'fdlt' and target_minikind in 'a':
3812
3741
            # unversioned, possibly, or possibly not deleted: we dont care.
3813
3742
            # if its still on disk, *and* theres no other entry at this
3814
3743
            # path [we dont know this in this routine at the moment -
3815
3744
            # perhaps we should change this - then it would be an unknown.
3816
3745
            old_path = pathjoin(entry[0][0], entry[0][1])
3817
3746
            # parent id is the entry for the path in the target tree
3818
 
            parent_id = self.state._get_entry(
3819
 
                self.source_index, path_utf8=entry[0][0])[0][2]
 
3747
            parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]
3820
3748
            if parent_id == entry[0][2]:
3821
3749
                parent_id = None
3822
 
            return TreeChange(
3823
 
                entry[0][2],
3824
 
                (self.utf8_decode(old_path)[0], None),
3825
 
                True,
3826
 
                (True, False),
3827
 
                (parent_id, None),
3828
 
                (self.utf8_decode(entry[0][1])[0], None),
3829
 
                (DirState._minikind_to_kind[source_minikind], None),
3830
 
                (source_details[3], None)), True
3831
 
        elif source_minikind in _fdlt and target_minikind in b'r':
 
3750
            return (entry[0][2],
 
3751
                   (self.utf8_decode(old_path)[0], None),
 
3752
                   True,
 
3753
                   (True, False),
 
3754
                   (parent_id, None),
 
3755
                   (self.utf8_decode(entry[0][1])[0], None),
 
3756
                   (DirState._minikind_to_kind[source_minikind], None),
 
3757
                   (source_details[3], None)), True
 
3758
        elif source_minikind in 'fdlt' and target_minikind in 'r':
3832
3759
            # a rename; could be a true rename, or a rename inherited from
3833
3760
            # a renamed parent. TODO: handle this efficiently. Its not
3834
3761
            # common case to rename dirs though, so a correct but slow
3835
3762
            # implementation will do.
3836
 
            if not osutils.is_inside_any(self.searched_specific_files,
3837
 
                                         target_details[1]):
 
3763
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
3838
3764
                self.search_specific_files.add(target_details[1])
3839
 
        elif source_minikind in _ra and target_minikind in _ra:
 
3765
        elif source_minikind in 'ra' and target_minikind in 'ra':
3840
3766
            # neither of the selected trees contain this file,
3841
3767
            # so skip over it. This is not currently directly tested, but
3842
3768
            # is indirectly via test_too_much.TestCommands.test_conflicts.
3843
3769
            pass
3844
3770
        else:
3845
3771
            raise AssertionError("don't know how to compare "
3846
 
                                 "source_minikind=%r, target_minikind=%r"
3847
 
                                 % (source_minikind, target_minikind))
 
3772
                "source_minikind=%r, target_minikind=%r"
 
3773
                % (source_minikind, target_minikind))
3848
3774
        return None, None
3849
3775
 
3850
3776
    def __iter__(self):
3852
3778
 
3853
3779
    def _gather_result_for_consistency(self, result):
3854
3780
        """Check a result we will yield to make sure we are consistent later.
3855
 
 
 
3781
        
3856
3782
        This gathers result's parents into a set to output later.
3857
3783
 
3858
3784
        :param result: A result tuple.
3859
3785
        """
3860
 
        if not self.partial or not result.file_id:
 
3786
        if not self.partial or not result[0]:
3861
3787
            return
3862
 
        self.seen_ids.add(result.file_id)
3863
 
        new_path = result.path[1]
 
3788
        self.seen_ids.add(result[0])
 
3789
        new_path = result[1][1]
3864
3790
        if new_path:
3865
3791
            # Not the root and not a delete: queue up the parents of the path.
3866
3792
            self.search_specific_file_parents.update(
3867
 
                p.encode('utf8') for p in osutils.parent_directories(new_path))
 
3793
                osutils.parent_directories(new_path.encode('utf8')))
3868
3794
            # Add the root directory which parent_directories does not
3869
3795
            # provide.
3870
 
            self.search_specific_file_parents.add(b'')
 
3796
            self.search_specific_file_parents.add('')
3871
3797
 
3872
3798
    def iter_changes(self):
3873
3799
        """Iterate over the changes."""
3874
3800
        utf8_decode = cache_utf8._utf8_decode
3875
 
        _lt_by_dirs = lt_by_dirs
 
3801
        _cmp_by_dirs = cmp_by_dirs
3876
3802
        _process_entry = self._process_entry
3877
3803
        search_specific_files = self.search_specific_files
3878
3804
        searched_specific_files = self.searched_specific_files
3927
3853
            root_abspath = self.tree.abspath(current_root_unicode)
3928
3854
            try:
3929
3855
                root_stat = os.lstat(root_abspath)
3930
 
            except OSError as e:
 
3856
            except OSError, e:
3931
3857
                if e.errno == errno.ENOENT:
3932
3858
                    # the path does not exist: let _process_entry know that.
3933
3859
                    root_dir_info = None
3935
3861
                    # some other random error: hand it up.
3936
3862
                    raise
3937
3863
            else:
3938
 
                root_dir_info = (b'', current_root,
3939
 
                                 osutils.file_kind_from_stat_mode(
3940
 
                                     root_stat.st_mode), root_stat,
3941
 
                                 root_abspath)
 
3864
                root_dir_info = ('', current_root,
 
3865
                    osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,
 
3866
                    root_abspath)
3942
3867
                if root_dir_info[2] == 'directory':
3943
3868
                    if self.tree._directory_is_tree_reference(
3944
 
                            current_root.decode('utf8')):
 
3869
                        current_root.decode('utf8')):
3945
3870
                        root_dir_info = root_dir_info[:2] + \
3946
3871
                            ('tree-reference',) + root_dir_info[3:]
3947
3872
 
3961
3886
                new_executable = bool(
3962
3887
                    stat.S_ISREG(root_dir_info[3].st_mode)
3963
3888
                    and stat.S_IEXEC & root_dir_info[3].st_mode)
3964
 
                yield TreeChange(
3965
 
                    None,
3966
 
                    (None, current_root_unicode),
3967
 
                    True,
3968
 
                    (False, False),
3969
 
                    (None, None),
3970
 
                    (None, splitpath(current_root_unicode)[-1]),
3971
 
                    (None, root_dir_info[2]),
3972
 
                    (None, new_executable)
3973
 
                    )
3974
 
            initial_key = (current_root, b'', b'')
 
3889
                yield (None,
 
3890
                       (None, current_root_unicode),
 
3891
                       True,
 
3892
                       (False, False),
 
3893
                       (None, None),
 
3894
                       (None, splitpath(current_root_unicode)[-1]),
 
3895
                       (None, root_dir_info[2]),
 
3896
                       (None, new_executable)
 
3897
                      )
 
3898
            initial_key = (current_root, '', '')
3975
3899
            block_index, _ = self.state._find_block_index_from_key(initial_key)
3976
3900
            if block_index == 0:
3977
3901
                # we have processed the total root already, but because the
3978
3902
                # initial key matched it we should skip it here.
3979
 
                block_index += 1
 
3903
                block_index +=1
3980
3904
            if root_dir_info and root_dir_info[2] == 'tree-reference':
3981
3905
                current_dir_info = None
3982
3906
            else:
3983
 
                dir_iterator = osutils._walkdirs_utf8(
3984
 
                    root_abspath, prefix=current_root)
 
3907
                dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root)
3985
3908
                try:
3986
 
                    current_dir_info = next(dir_iterator)
3987
 
                except OSError as e:
 
3909
                    current_dir_info = dir_iterator.next()
 
3910
                except OSError, e:
3988
3911
                    # on win32, python2.4 has e.errno == ERROR_DIRECTORY, but
3989
3912
                    # python 2.5 has e.errno == EINVAL,
3990
3913
                    #            and e.winerror == ERROR_DIRECTORY
3996
3919
                    if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
3997
3920
                        current_dir_info = None
3998
3921
                    elif (sys.platform == 'win32'
3999
 
                          and (e.errno in win_errors or
4000
 
                               e_winerror in win_errors)):
 
3922
                          and (e.errno in win_errors
 
3923
                               or e_winerror in win_errors)):
4001
3924
                        current_dir_info = None
4002
3925
                    else:
4003
3926
                        raise
4004
3927
                else:
4005
 
                    if current_dir_info[0][0] == b'':
 
3928
                    if current_dir_info[0][0] == '':
4006
3929
                        # remove .bzr from iteration
4007
 
                        bzr_index = bisect.bisect_left(
4008
 
                            current_dir_info[1], (b'.bzr',))
4009
 
                        if current_dir_info[1][bzr_index][0] != b'.bzr':
 
3930
                        bzr_index = bisect.bisect_left(current_dir_info[1], ('.bzr',))
 
3931
                        if current_dir_info[1][bzr_index][0] != '.bzr':
4010
3932
                            raise AssertionError()
4011
3933
                        del current_dir_info[1][bzr_index]
4012
3934
            # walk until both the directory listing and the versioned metadata
4013
3935
            # are exhausted.
4014
3936
            if (block_index < len(self.state._dirblocks) and
4015
 
                osutils.is_inside(current_root,
4016
 
                                  self.state._dirblocks[block_index][0])):
 
3937
                osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
4017
3938
                current_block = self.state._dirblocks[block_index]
4018
3939
            else:
4019
3940
                current_block = None
4020
3941
            while (current_dir_info is not None or
4021
3942
                   current_block is not None):
4022
3943
                if (current_dir_info and current_block
4023
 
                        and current_dir_info[0][0] != current_block[0]):
4024
 
                    if _lt_by_dirs(current_dir_info[0][0], current_block[0]):
 
3944
                    and current_dir_info[0][0] != current_block[0]):
 
3945
                    if _cmp_by_dirs(current_dir_info[0][0], current_block[0]) < 0:
4025
3946
                        # filesystem data refers to paths not covered by the dirblock.
4026
3947
                        # this has two possibilities:
4027
3948
                        # A) it is versioned but empty, so there is no block for it
4033
3954
                        # recurse into unknown directories.
4034
3955
                        path_index = 0
4035
3956
                        while path_index < len(current_dir_info[1]):
4036
 
                            current_path_info = current_dir_info[1][path_index]
4037
 
                            if self.want_unversioned:
4038
 
                                if current_path_info[2] == 'directory':
4039
 
                                    if self.tree._directory_is_tree_reference(
 
3957
                                current_path_info = current_dir_info[1][path_index]
 
3958
                                if self.want_unversioned:
 
3959
                                    if current_path_info[2] == 'directory':
 
3960
                                        if self.tree._directory_is_tree_reference(
4040
3961
                                            current_path_info[0].decode('utf8')):
4041
 
                                        current_path_info = current_path_info[:2] + \
4042
 
                                            ('tree-reference',) + \
4043
 
                                            current_path_info[3:]
4044
 
                                new_executable = bool(
4045
 
                                    stat.S_ISREG(current_path_info[3].st_mode)
4046
 
                                    and stat.S_IEXEC & current_path_info[3].st_mode)
4047
 
                                yield TreeChange(
4048
 
                                    None,
4049
 
                                    (None, utf8_decode(current_path_info[0])[0]),
4050
 
                                    True,
4051
 
                                    (False, False),
4052
 
                                    (None, None),
4053
 
                                    (None, utf8_decode(current_path_info[1])[0]),
4054
 
                                    (None, current_path_info[2]),
4055
 
                                    (None, new_executable))
4056
 
                            # dont descend into this unversioned path if it is
4057
 
                            # a dir
4058
 
                            if current_path_info[2] in ('directory',
4059
 
                                                        'tree-reference'):
4060
 
                                del current_dir_info[1][path_index]
4061
 
                                path_index -= 1
4062
 
                            path_index += 1
 
3962
                                            current_path_info = current_path_info[:2] + \
 
3963
                                                ('tree-reference',) + current_path_info[3:]
 
3964
                                    new_executable = bool(
 
3965
                                        stat.S_ISREG(current_path_info[3].st_mode)
 
3966
                                        and stat.S_IEXEC & current_path_info[3].st_mode)
 
3967
                                    yield (None,
 
3968
                                        (None, utf8_decode(current_path_info[0])[0]),
 
3969
                                        True,
 
3970
                                        (False, False),
 
3971
                                        (None, None),
 
3972
                                        (None, utf8_decode(current_path_info[1])[0]),
 
3973
                                        (None, current_path_info[2]),
 
3974
                                        (None, new_executable))
 
3975
                                # dont descend into this unversioned path if it is
 
3976
                                # a dir
 
3977
                                if current_path_info[2] in ('directory',
 
3978
                                                            'tree-reference'):
 
3979
                                    del current_dir_info[1][path_index]
 
3980
                                    path_index -= 1
 
3981
                                path_index += 1
4063
3982
 
4064
3983
                        # This dir info has been handled, go to the next
4065
3984
                        try:
4066
 
                            current_dir_info = next(dir_iterator)
 
3985
                            current_dir_info = dir_iterator.next()
4067
3986
                        except StopIteration:
4068
3987
                            current_dir_info = None
4069
3988
                    else:
4077
3996
                        for current_entry in current_block[1]:
4078
3997
                            # entry referring to file not present on disk.
4079
3998
                            # advance the entry only, after processing.
4080
 
                            result, changed = _process_entry(
4081
 
                                current_entry, None)
 
3999
                            result, changed = _process_entry(current_entry, None)
4082
4000
                            if changed is not None:
4083
4001
                                if changed:
4084
4002
                                    self._gather_result_for_consistency(result)
4085
4003
                                if changed or self.include_unchanged:
4086
4004
                                    yield result
4087
 
                        block_index += 1
 
4005
                        block_index +=1
4088
4006
                        if (block_index < len(self.state._dirblocks) and
4089
4007
                            osutils.is_inside(current_root,
4090
4008
                                              self.state._dirblocks[block_index][0])):
4103
4021
                    current_path_info = current_dir_info[1][path_index]
4104
4022
                    if current_path_info[2] == 'directory':
4105
4023
                        if self.tree._directory_is_tree_reference(
4106
 
                                current_path_info[0].decode('utf8')):
 
4024
                            current_path_info[0].decode('utf8')):
4107
4025
                            current_path_info = current_path_info[:2] + \
4108
4026
                                ('tree-reference',) + current_path_info[3:]
4109
4027
                else:
4111
4029
                advance_path = True
4112
4030
                path_handled = False
4113
4031
                while (current_entry is not None or
4114
 
                       current_path_info is not None):
 
4032
                    current_path_info is not None):
4115
4033
                    if current_entry is None:
4116
4034
                        # the check for path_handled when the path is advanced
4117
4035
                        # will yield this path if needed.
4118
4036
                        pass
4119
4037
                    elif current_path_info is None:
4120
4038
                        # no path is fine: the per entry code will handle it.
4121
 
                        result, changed = _process_entry(
4122
 
                            current_entry, current_path_info)
 
4039
                        result, changed = _process_entry(current_entry, current_path_info)
4123
4040
                        if changed is not None:
4124
4041
                            if changed:
4125
4042
                                self._gather_result_for_consistency(result)
4126
4043
                            if changed or self.include_unchanged:
4127
4044
                                yield result
4128
4045
                    elif (current_entry[0][1] != current_path_info[1]
4129
 
                          or current_entry[1][self.target_index][0] in (b'a', b'r')):
 
4046
                          or current_entry[1][self.target_index][0] in 'ar'):
4130
4047
                        # The current path on disk doesn't match the dirblock
4131
4048
                        # record. Either the dirblock is marked as absent, or
4132
4049
                        # the file on disk is not present at all in the
4142
4059
                        else:
4143
4060
                            # entry referring to file not present on disk.
4144
4061
                            # advance the entry only, after processing.
4145
 
                            result, changed = _process_entry(
4146
 
                                current_entry, None)
 
4062
                            result, changed = _process_entry(current_entry, None)
4147
4063
                            if changed is not None:
4148
4064
                                if changed:
4149
4065
                                    self._gather_result_for_consistency(result)
4151
4067
                                    yield result
4152
4068
                            advance_path = False
4153
4069
                    else:
4154
 
                        result, changed = _process_entry(
4155
 
                            current_entry, current_path_info)
 
4070
                        result, changed = _process_entry(current_entry, current_path_info)
4156
4071
                        if changed is not None:
4157
4072
                            path_handled = True
4158
4073
                            if changed:
4166
4081
                        else:
4167
4082
                            current_entry = None
4168
4083
                    else:
4169
 
                        advance_entry = True  # reset the advance flaga
 
4084
                        advance_entry = True # reset the advance flaga
4170
4085
                    if advance_path and current_path_info is not None:
4171
4086
                        if not path_handled:
4172
4087
                            # unversioned in all regards
4175
4090
                                    stat.S_ISREG(current_path_info[3].st_mode)
4176
4091
                                    and stat.S_IEXEC & current_path_info[3].st_mode)
4177
4092
                                try:
4178
 
                                    relpath_unicode = utf8_decode(
4179
 
                                        current_path_info[0])[0]
 
4093
                                    relpath_unicode = utf8_decode(current_path_info[0])[0]
4180
4094
                                except UnicodeDecodeError:
4181
4095
                                    raise errors.BadFilenameEncoding(
4182
4096
                                        current_path_info[0], osutils._fs_enc)
4183
 
                                yield TreeChange(
4184
 
                                    None,
 
4097
                                yield (None,
4185
4098
                                    (None, relpath_unicode),
4186
4099
                                    True,
4187
4100
                                    (False, False),
4204
4117
                            current_path_info = current_dir_info[1][path_index]
4205
4118
                            if current_path_info[2] == 'directory':
4206
4119
                                if self.tree._directory_is_tree_reference(
4207
 
                                        current_path_info[0].decode('utf8')):
 
4120
                                    current_path_info[0].decode('utf8')):
4208
4121
                                    current_path_info = current_path_info[:2] + \
4209
 
                                        ('tree-reference',) + \
4210
 
                                        current_path_info[3:]
 
4122
                                        ('tree-reference',) + current_path_info[3:]
4211
4123
                        else:
4212
4124
                            current_path_info = None
4213
4125
                        path_handled = False
4214
4126
                    else:
4215
 
                        advance_path = True  # reset the advance flagg.
 
4127
                        advance_path = True # reset the advance flagg.
4216
4128
                if current_block is not None:
4217
4129
                    block_index += 1
4218
4130
                    if (block_index < len(self.state._dirblocks) and
4219
 
                        osutils.is_inside(current_root,
4220
 
                                          self.state._dirblocks[block_index][0])):
 
4131
                        osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
4221
4132
                        current_block = self.state._dirblocks[block_index]
4222
4133
                    else:
4223
4134
                        current_block = None
4224
4135
                if current_dir_info is not None:
4225
4136
                    try:
4226
 
                        current_dir_info = next(dir_iterator)
 
4137
                        current_dir_info = dir_iterator.next()
4227
4138
                    except StopIteration:
4228
4139
                        current_dir_info = None
4229
4140
        for result in self._iter_specific_file_parents():
4251
4162
            found_item = False
4252
4163
            for candidate_entry in path_entries:
4253
4164
                # Find entries present in target at this path:
4254
 
                if candidate_entry[1][self.target_index][0] not in (b'a', b'r'):
 
4165
                if candidate_entry[1][self.target_index][0] not in 'ar':
4255
4166
                    found_item = True
4256
4167
                    selected_entries.append(candidate_entry)
4257
4168
                # Find entries present in source at this path:
4258
4169
                elif (self.source_index is not None and
4259
 
                      candidate_entry[1][self.source_index][0] not in (b'a', b'r')):
 
4170
                    candidate_entry[1][self.source_index][0] not in 'ar'):
4260
4171
                    found_item = True
4261
 
                    if candidate_entry[1][self.target_index][0] == b'a':
 
4172
                    if candidate_entry[1][self.target_index][0] == 'a':
4262
4173
                        # Deleted, emit it here.
4263
4174
                        selected_entries.append(candidate_entry)
4264
4175
                    else:
4269
4180
            if not found_item:
4270
4181
                raise AssertionError(
4271
4182
                    "Missing entry for specific path parent %r, %r" % (
4272
 
                        path_utf8, path_entries))
 
4183
                    path_utf8, path_entries))
4273
4184
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
4274
4185
            for entry in selected_entries:
4275
4186
                if entry[0][2] in self.seen_ids:
4279
4190
                    raise AssertionError(
4280
4191
                        "Got entry<->path mismatch for specific path "
4281
4192
                        "%r entry %r path_info %r " % (
4282
 
                            path_utf8, entry, path_info))
 
4193
                        path_utf8, entry, path_info))
4283
4194
                # Only include changes - we're outside the users requested
4284
4195
                # expansion.
4285
4196
                if changed:
4286
4197
                    self._gather_result_for_consistency(result)
4287
 
                    if (result.kind[0] == 'directory' and
4288
 
                            result.kind[1] != 'directory'):
 
4198
                    if (result[6][0] == 'directory' and
 
4199
                        result[6][1] != 'directory'):
4289
4200
                        # This stopped being a directory, the old children have
4290
4201
                        # to be included.
4291
 
                        if entry[1][self.source_index][0] == b'r':
 
4202
                        if entry[1][self.source_index][0] == 'r':
4292
4203
                            # renamed, take the source path
4293
4204
                            entry_path_utf8 = entry[1][self.source_index][1]
4294
4205
                        else:
4295
4206
                            entry_path_utf8 = path_utf8
4296
 
                        initial_key = (entry_path_utf8, b'', b'')
 
4207
                        initial_key = (entry_path_utf8, '', '')
4297
4208
                        block_index, _ = self.state._find_block_index_from_key(
4298
4209
                            initial_key)
4299
4210
                        if block_index == 0:
4300
4211
                            # The children of the root are in block index 1.
4301
 
                            block_index += 1
 
4212
                            block_index +=1
4302
4213
                        current_block = None
4303
4214
                        if block_index < len(self.state._dirblocks):
4304
4215
                            current_block = self.state._dirblocks[block_index]
4305
4216
                            if not osutils.is_inside(
4306
 
                                    entry_path_utf8, current_block[0]):
 
4217
                                entry_path_utf8, current_block[0]):
4307
4218
                                # No entries for this directory at all.
4308
4219
                                current_block = None
4309
4220
                        if current_block is not None:
4310
4221
                            for entry in current_block[1]:
4311
 
                                if entry[1][self.source_index][0] in (b'a', b'r'):
 
4222
                                if entry[1][self.source_index][0] in 'ar':
4312
4223
                                    # Not in the source tree, so doesn't have to be
4313
4224
                                    # included.
4314
4225
                                    continue
4328
4239
        abspath = self.tree.abspath(unicode_path)
4329
4240
        try:
4330
4241
            stat = os.lstat(abspath)
4331
 
        except OSError as e:
 
4242
        except OSError, e:
4332
4243
            if e.errno == errno.ENOENT:
4333
4244
                # the path does not exist.
4334
4245
                return None
4335
4246
            else:
4336
4247
                raise
4337
 
        utf8_basename = utf8_path.rsplit(b'/', 1)[-1]
 
4248
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
4338
4249
        dir_info = (utf8_path, utf8_basename,
4339
 
                    osutils.file_kind_from_stat_mode(stat.st_mode), stat,
4340
 
                    abspath)
 
4250
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
4251
            abspath)
4341
4252
        if dir_info[2] == 'directory':
4342
4253
            if self.tree._directory_is_tree_reference(
4343
 
                    unicode_path):
 
4254
                unicode_path):
4344
4255
                self.root_dir_info = self.root_dir_info[:2] + \
4345
4256
                    ('tree-reference',) + self.root_dir_info[3:]
4346
4257
        return dir_info
4348
4259
 
4349
4260
# Try to load the compiled form if possible
4350
4261
try:
4351
 
    from ._dirstate_helpers_pyx import (
 
4262
    from brzlib._dirstate_helpers_pyx import (
4352
4263
        _read_dirblocks,
4353
4264
        bisect_dirblock,
4354
4265
        _bisect_path_left,
4355
4266
        _bisect_path_right,
4356
 
        lt_by_dirs,
 
4267
        cmp_by_dirs,
4357
4268
        pack_stat,
4358
4269
        ProcessEntryC as _process_entry,
4359
4270
        update_entry as update_entry,
4360
4271
        )
4361
 
except ImportError as e:
 
4272
except ImportError, e:
4362
4273
    osutils.failed_to_load_extension(e)
4363
 
    from ._dirstate_helpers_py import (
 
4274
    from brzlib._dirstate_helpers_py import (
4364
4275
        _read_dirblocks,
4365
4276
        bisect_dirblock,
4366
4277
        _bisect_path_left,
4367
4278
        _bisect_path_right,
4368
 
        lt_by_dirs,
 
4279
        cmp_by_dirs,
4369
4280
        pack_stat,
4370
4281
        )
4371
4282
    # FIXME: It would be nice to be able to track moved lines so that the