/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Robert Collins
  • Date: 2010-05-06 11:08:10 UTC
  • mto: This revision was merged to the branch mainline in revision 5223.
  • Revision ID: robertc@robertcollins.net-20100506110810-h3j07fh5gmw54s25
Cleaner matcher matching revised unlocking protocol.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2006, 2007, 2008 Canonical Ltd
 
1
# Copyright (C) 2006-2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
204
204
import bisect
205
205
import binascii
206
206
import errno
 
207
import operator
207
208
import os
208
209
from stat import S_IEXEC
209
210
import stat
1277
1278
    def update_by_delta(self, delta):
1278
1279
        """Apply an inventory delta to the dirstate for tree 0
1279
1280
 
 
1281
        This is the workhorse for apply_inventory_delta in dirstate based
 
1282
        trees.
 
1283
 
1280
1284
        :param delta: An inventory delta.  See Inventory.apply_delta for
1281
1285
            details.
1282
1286
        """
1283
1287
        self._read_dirblocks_if_needed()
 
1288
        encode = cache_utf8.encode
1284
1289
        insertions = {}
1285
1290
        removals = {}
1286
 
        for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
 
1291
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1292
        # items or items placed under files/links/tree-references. We get
 
1293
        # references from every item in the delta that is not a deletion and
 
1294
        # is not itself the root.
 
1295
        parents = set()
 
1296
        # Added ids must not be in the dirstate already. This set holds those
 
1297
        # ids.
 
1298
        new_ids = set()
 
1299
        # This loop transforms the delta to single atomic operations that can
 
1300
        # be executed and validated.
 
1301
        for old_path, new_path, file_id, inv_entry in sorted(
 
1302
            inventory._check_delta_unique_old_paths(
 
1303
            inventory._check_delta_unique_new_paths(
 
1304
            inventory._check_delta_ids_match_entry(
 
1305
            inventory._check_delta_ids_are_valid(
 
1306
            inventory._check_delta_new_path_entry_both_or_None(delta))))),
 
1307
            reverse=True):
1287
1308
            if (file_id in insertions) or (file_id in removals):
1288
 
                raise AssertionError("repeated file id in delta %r" % (file_id,))
 
1309
                raise errors.InconsistentDelta(old_path or new_path, file_id,
 
1310
                    "repeated file_id")
1289
1311
            if old_path is not None:
1290
1312
                old_path = old_path.encode('utf-8')
1291
1313
                removals[file_id] = old_path
 
1314
            else:
 
1315
                new_ids.add(file_id)
1292
1316
            if new_path is not None:
 
1317
                if inv_entry is None:
 
1318
                    raise errors.InconsistentDelta(new_path, file_id,
 
1319
                        "new_path with no entry")
1293
1320
                new_path = new_path.encode('utf-8')
1294
 
                dirname, basename = osutils.split(new_path)
1295
 
                key = (dirname, basename, file_id)
 
1321
                dirname_utf8, basename = osutils.split(new_path)
 
1322
                if basename:
 
1323
                    parents.add((dirname_utf8, inv_entry.parent_id))
 
1324
                key = (dirname_utf8, basename, file_id)
1296
1325
                minikind = DirState._kind_to_minikind[inv_entry.kind]
1297
1326
                if minikind == 't':
1298
 
                    fingerprint = inv_entry.reference_revision
 
1327
                    fingerprint = inv_entry.reference_revision or ''
1299
1328
                else:
1300
1329
                    fingerprint = ''
1301
1330
                insertions[file_id] = (key, minikind, inv_entry.executable,
1310
1339
                    minikind = child[1][0][0]
1311
1340
                    fingerprint = child[1][0][4]
1312
1341
                    executable = child[1][0][3]
1313
 
                    old_child_path = osutils.pathjoin(child[0][0],
1314
 
                                                      child[0][1])
 
1342
                    old_child_path = osutils.pathjoin(child_dirname,
 
1343
                                                      child_basename)
1315
1344
                    removals[child[0][2]] = old_child_path
1316
1345
                    child_suffix = child_dirname[len(old_path):]
1317
1346
                    new_child_dirname = (new_path + child_suffix)
1318
1347
                    key = (new_child_dirname, child_basename, child[0][2])
1319
 
                    new_child_path = os.path.join(new_child_dirname,
1320
 
                                                  child_basename)
 
1348
                    new_child_path = osutils.pathjoin(new_child_dirname,
 
1349
                                                      child_basename)
1321
1350
                    insertions[child[0][2]] = (key, minikind, executable,
1322
1351
                                               fingerprint, new_child_path)
1323
 
        self._apply_removals(removals.values())
1324
 
        self._apply_insertions(insertions.values())
 
1352
        self._check_delta_ids_absent(new_ids, delta, 0)
 
1353
        try:
 
1354
            self._apply_removals(removals.iteritems())
 
1355
            self._apply_insertions(insertions.values())
 
1356
            # Validate parents
 
1357
            self._after_delta_check_parents(parents, 0)
 
1358
        except errors.BzrError, e:
 
1359
            self._changes_aborted = True
 
1360
            if 'integrity error' not in str(e):
 
1361
                raise
 
1362
            # _get_entry raises BzrError when a request is inconsistent; we
 
1363
            # want such errors to be shown as InconsistentDelta - and that 
 
1364
            # fits the behaviour we trigger.
 
1365
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1325
1366
 
1326
1367
    def _apply_removals(self, removals):
1327
 
        for path in sorted(removals, reverse=True):
 
1368
        for file_id, path in sorted(removals, reverse=True,
 
1369
            key=operator.itemgetter(1)):
1328
1370
            dirname, basename = osutils.split(path)
1329
1371
            block_i, entry_i, d_present, f_present = \
1330
1372
                self._get_block_entry_index(dirname, basename, 0)
1331
 
            entry = self._dirblocks[block_i][1][entry_i]
 
1373
            try:
 
1374
                entry = self._dirblocks[block_i][1][entry_i]
 
1375
            except IndexError:
 
1376
                self._changes_aborted = True
 
1377
                raise errors.InconsistentDelta(path, file_id,
 
1378
                    "Wrong path for old path.")
 
1379
            if not f_present or entry[1][0][0] in 'ar':
 
1380
                self._changes_aborted = True
 
1381
                raise errors.InconsistentDelta(path, file_id,
 
1382
                    "Wrong path for old path.")
 
1383
            if file_id != entry[0][2]:
 
1384
                self._changes_aborted = True
 
1385
                raise errors.InconsistentDelta(path, file_id,
 
1386
                    "Attempt to remove path has wrong id - found %r."
 
1387
                    % entry[0][2])
1332
1388
            self._make_absent(entry)
1333
1389
            # See if we have a malformed delta: deleting a directory must not
1334
1390
            # leave crud behind. This increases the number of bisects needed
1342
1398
                # be due to it being in a parent tree, or a corrupt delta.
1343
1399
                for child_entry in self._dirblocks[block_i][1]:
1344
1400
                    if child_entry[1][0][0] not in ('r', 'a'):
 
1401
                        self._changes_aborted = True
1345
1402
                        raise errors.InconsistentDelta(path, entry[0][2],
1346
1403
                            "The file id was deleted but its children were "
1347
1404
                            "not deleted.")
1348
1405
 
1349
1406
    def _apply_insertions(self, adds):
1350
 
        for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1351
 
            self.update_minimal(key, minikind, executable, fingerprint,
1352
 
                                path_utf8=path_utf8)
 
1407
        try:
 
1408
            for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
 
1409
                self.update_minimal(key, minikind, executable, fingerprint,
 
1410
                                    path_utf8=path_utf8)
 
1411
        except errors.NotVersionedError:
 
1412
            self._changes_aborted = True
 
1413
            raise errors.InconsistentDelta(path_utf8.decode('utf8'), key[2],
 
1414
                "Missing parent")
1353
1415
 
1354
1416
    def update_basis_by_delta(self, delta, new_revid):
1355
1417
        """Update the parents of this tree after a commit.
1399
1461
        # At the same time, to reduce interface friction we convert the input
1400
1462
        # inventory entries to dirstate.
1401
1463
        root_only = ('', '')
 
1464
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1465
        # items or items placed under files/links/tree-references. We get
 
1466
        # references from every item in the delta that is not a deletion and
 
1467
        # is not itself the root.
 
1468
        parents = set()
 
1469
        # Added ids must not be in the dirstate already. This set holds those
 
1470
        # ids.
 
1471
        new_ids = set()
1402
1472
        for old_path, new_path, file_id, inv_entry in delta:
 
1473
            if inv_entry is not None and file_id != inv_entry.file_id:
 
1474
                raise errors.InconsistentDelta(new_path, file_id,
 
1475
                    "mismatched entry file_id %r" % inv_entry)
 
1476
            if new_path is not None:
 
1477
                if inv_entry is None:
 
1478
                    raise errors.InconsistentDelta(new_path, file_id,
 
1479
                        "new_path with no entry")
 
1480
                new_path_utf8 = encode(new_path)
 
1481
                # note the parent for validation
 
1482
                dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
 
1483
                if basename_utf8:
 
1484
                    parents.add((dirname_utf8, inv_entry.parent_id))
1403
1485
            if old_path is None:
1404
1486
                adds.append((None, encode(new_path), file_id,
1405
1487
                    inv_to_entry(inv_entry), True))
 
1488
                new_ids.add(file_id)
1406
1489
            elif new_path is None:
1407
1490
                deletes.append((encode(old_path), None, file_id, None, True))
1408
1491
            elif (old_path, new_path) != root_only:
1420
1503
                # for 'r' items on every pass.
1421
1504
                self._update_basis_apply_deletes(deletes)
1422
1505
                deletes = []
1423
 
                new_path_utf8 = encode(new_path)
1424
1506
                # Split into an add/delete pair recursively.
1425
1507
                adds.append((None, new_path_utf8, file_id,
1426
1508
                    inv_to_entry(inv_entry), False))
1452
1534
                # of everything.
1453
1535
                changes.append((encode(old_path), encode(new_path), file_id,
1454
1536
                    inv_to_entry(inv_entry)))
1455
 
 
1456
 
        # Finish expunging deletes/first half of renames.
1457
 
        self._update_basis_apply_deletes(deletes)
1458
 
        # Reinstate second half of renames and new paths.
1459
 
        self._update_basis_apply_adds(adds)
1460
 
        # Apply in-situ changes.
1461
 
        self._update_basis_apply_changes(changes)
 
1537
        self._check_delta_ids_absent(new_ids, delta, 1)
 
1538
        try:
 
1539
            # Finish expunging deletes/first half of renames.
 
1540
            self._update_basis_apply_deletes(deletes)
 
1541
            # Reinstate second half of renames and new paths.
 
1542
            self._update_basis_apply_adds(adds)
 
1543
            # Apply in-situ changes.
 
1544
            self._update_basis_apply_changes(changes)
 
1545
            # Validate parents
 
1546
            self._after_delta_check_parents(parents, 1)
 
1547
        except errors.BzrError, e:
 
1548
            self._changes_aborted = True
 
1549
            if 'integrity error' not in str(e):
 
1550
                raise
 
1551
            # _get_entry raises BzrError when a request is inconsistent; we
 
1552
            # want such errors to be shown as InconsistentDelta - and that 
 
1553
            # fits the behaviour we trigger. Partof this is driven by dirstate
 
1554
            # only supporting deltas that turn the basis into a closer fit to
 
1555
            # the active tree.
 
1556
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1462
1557
 
1463
1558
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1464
1559
        self._header_state = DirState.IN_MEMORY_MODIFIED
1465
1560
        self._id_index = None
1466
1561
        return
1467
1562
 
 
1563
    def _check_delta_ids_absent(self, new_ids, delta, tree_index):
 
1564
        """Check that none of the file_ids in new_ids are present in a tree."""
 
1565
        if not new_ids:
 
1566
            return
 
1567
        id_index = self._get_id_index()
 
1568
        for file_id in new_ids:
 
1569
            for key in id_index.get(file_id, []):
 
1570
                block_i, entry_i, d_present, f_present = \
 
1571
                    self._get_block_entry_index(key[0], key[1], tree_index)
 
1572
                if not f_present:
 
1573
                    # In a different tree
 
1574
                    continue
 
1575
                entry = self._dirblocks[block_i][1][entry_i]
 
1576
                if entry[0][2] != file_id:
 
1577
                    # Different file_id, so not what we want.
 
1578
                    continue
 
1579
                # NB: No changes made before this helper is called, so no need
 
1580
                # to set the _changes_aborted flag.
 
1581
                raise errors.InconsistentDelta(
 
1582
                    ("%s/%s" % key[0:2]).decode('utf8'), file_id,
 
1583
                    "This file_id is new in the delta but already present in "
 
1584
                    "the target")
 
1585
 
1468
1586
    def _update_basis_apply_adds(self, adds):
1469
1587
        """Apply a sequence of adds to tree 1 during update_basis_by_delta.
1470
1588
 
1535
1653
        null = DirState.NULL_PARENT_DETAILS
1536
1654
        for old_path, new_path, file_id, _, real_delete in deletes:
1537
1655
            if real_delete != (new_path is None):
 
1656
                self._changes_aborted = True
1538
1657
                raise AssertionError("bad delete delta")
1539
1658
            # the entry for this file_id must be in tree 1.
1540
1659
            dirname, basename = osutils.split(old_path)
1573
1692
                    # it is being resurrected here, so blank it out temporarily.
1574
1693
                    self._dirblocks[block_index][1][entry_index][1][1] = null
1575
1694
 
 
1695
    def _after_delta_check_parents(self, parents, index):
 
1696
        """Check that parents required by the delta are all intact.
 
1697
        
 
1698
        :param parents: An iterable of (path_utf8, file_id) tuples which are
 
1699
            required to be present in tree 'index' at path_utf8 with id file_id
 
1700
            and be a directory.
 
1701
        :param index: The column in the dirstate to check for parents in.
 
1702
        """
 
1703
        for dirname_utf8, file_id in parents:
 
1704
            # Get the entry - the ensures that file_id, dirname_utf8 exists and
 
1705
            # has the right file id.
 
1706
            entry = self._get_entry(index, file_id, dirname_utf8)
 
1707
            if entry[1] is None:
 
1708
                self._changes_aborted = True
 
1709
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1710
                    file_id, "This parent is not present.")
 
1711
            # Parents of things must be directories
 
1712
            if entry[1][index][0] != 'd':
 
1713
                self._changes_aborted = True
 
1714
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1715
                    file_id, "This parent is not a directory.")
 
1716
 
1576
1717
    def _observed_sha1(self, entry, sha1, stat_value,
1577
1718
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
1578
1719
        """Note the sha1 of a file.
1821
1962
        self._read_dirblocks_if_needed()
1822
1963
        if path_utf8 is not None:
1823
1964
            if type(path_utf8) is not str:
1824
 
                raise AssertionError('path_utf8 is not a str: %s %s'
 
1965
                raise errors.BzrError('path_utf8 is not a str: %s %r'
1825
1966
                    % (type(path_utf8), path_utf8))
1826
1967
            # path lookups are faster
1827
1968
            dirname, basename = osutils.split(path_utf8)
1856
1997
                entry_index, present = self._find_entry_index(key, block)
1857
1998
                if present:
1858
1999
                    entry = self._dirblocks[block_index][1][entry_index]
 
2000
                    # TODO: We might want to assert that entry[0][2] ==
 
2001
                    #       fileid_utf8.
1859
2002
                    if entry[1][tree_index][0] in 'fdlt':
1860
2003
                        # this is the result we are looking for: the
1861
2004
                        # real home of this file_id in this tree.
2213
2356
        self.update_minimal(('', '', new_id), 'd',
2214
2357
            path_utf8='', packed_stat=entry[1][0][4])
2215
2358
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2216
 
        if self._id_index is not None:
2217
 
            self._id_index.setdefault(new_id, set()).add(entry[0])
2218
2359
 
2219
2360
    def set_parent_trees(self, trees, ghosts):
2220
2361
        """Set the parent trees for the dirstate.
2379
2520
        if 'evil' in debug.debug_flags:
2380
2521
            trace.mutter_callsite(1,
2381
2522
                "set_state_from_inventory called; please mutate the tree instead")
 
2523
        tracing = 'dirstate' in debug.debug_flags
 
2524
        if tracing:
 
2525
            trace.mutter("set_state_from_inventory trace:")
2382
2526
        self._read_dirblocks_if_needed()
2383
2527
        # sketch:
2384
2528
        # Two iterators: current data and new data, both in dirblock order.
2393
2537
        new_iterator = new_inv.iter_entries_by_dir()
2394
2538
        # we will be modifying the dirstate, so we need a stable iterator. In
2395
2539
        # future we might write one, for now we just clone the state into a
2396
 
        # list - which is a shallow copy.
 
2540
        # list using a copy so that we see every original item and don't have
 
2541
        # to adjust the position when items are inserted or deleted in the
 
2542
        # underlying dirstate.
2397
2543
        old_iterator = iter(list(self._iter_entries()))
2398
2544
        # both must have roots so this is safe:
2399
2545
        current_new = new_iterator.next()
2433
2579
            # we make both end conditions explicit
2434
2580
            if not current_old:
2435
2581
                # old is finished: insert current_new into the state.
 
2582
                if tracing:
 
2583
                    trace.mutter("Appending from new '%s'.",
 
2584
                        new_path_utf8.decode('utf8'))
2436
2585
                self.update_minimal(new_entry_key, current_new_minikind,
2437
2586
                    executable=current_new[1].executable,
2438
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2587
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2588
                    fullscan=True)
2439
2589
                current_new = advance(new_iterator)
2440
2590
            elif not current_new:
2441
2591
                # new is finished
 
2592
                if tracing:
 
2593
                    trace.mutter("Truncating from old '%s/%s'.",
 
2594
                        current_old[0][0].decode('utf8'),
 
2595
                        current_old[0][1].decode('utf8'))
2442
2596
                self._make_absent(current_old)
2443
2597
                current_old = advance(old_iterator)
2444
2598
            elif new_entry_key == current_old[0]:
2451
2605
                # kind has changed.
2452
2606
                if (current_old[1][0][3] != current_new[1].executable or
2453
2607
                    current_old[1][0][0] != current_new_minikind):
 
2608
                    if tracing:
 
2609
                        trace.mutter("Updating in-place change '%s'.",
 
2610
                            new_path_utf8.decode('utf8'))
2454
2611
                    self.update_minimal(current_old[0], current_new_minikind,
2455
2612
                        executable=current_new[1].executable,
2456
 
                        path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2613
                        path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2614
                        fullscan=True)
2457
2615
                # both sides are dealt with, move on
2458
2616
                current_old = advance(old_iterator)
2459
2617
                current_new = advance(new_iterator)
2462
2620
                      and new_entry_key[1:] < current_old[0][1:])):
2463
2621
                # new comes before:
2464
2622
                # add a entry for this and advance new
 
2623
                if tracing:
 
2624
                    trace.mutter("Inserting from new '%s'.",
 
2625
                        new_path_utf8.decode('utf8'))
2465
2626
                self.update_minimal(new_entry_key, current_new_minikind,
2466
2627
                    executable=current_new[1].executable,
2467
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2628
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2629
                    fullscan=True)
2468
2630
                current_new = advance(new_iterator)
2469
2631
            else:
2470
2632
                # we've advanced past the place where the old key would be,
2471
2633
                # without seeing it in the new list.  so it must be gone.
 
2634
                if tracing:
 
2635
                    trace.mutter("Deleting from old '%s/%s'.",
 
2636
                        current_old[0][0].decode('utf8'),
 
2637
                        current_old[0][1].decode('utf8'))
2472
2638
                self._make_absent(current_old)
2473
2639
                current_old = advance(old_iterator)
2474
2640
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2475
2641
        self._id_index = None
2476
2642
        self._packed_stat_index = None
 
2643
        if tracing:
 
2644
            trace.mutter("set_state_from_inventory complete.")
2477
2645
 
2478
2646
    def _make_absent(self, current_old):
2479
2647
        """Mark current_old - an entry - as absent for tree 0.
2528
2696
        return last_reference
2529
2697
 
2530
2698
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
2531
 
                       packed_stat=None, size=0, path_utf8=None):
 
2699
        packed_stat=None, size=0, path_utf8=None, fullscan=False):
2532
2700
        """Update an entry to the state in tree 0.
2533
2701
 
2534
2702
        This will either create a new entry at 'key' or update an existing one.
2545
2713
        :param size: Size information for new entry
2546
2714
        :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
2547
2715
                extra computation.
 
2716
        :param fullscan: If True then a complete scan of the dirstate is being
 
2717
            done and checking for duplicate rows should not be done. This
 
2718
            should only be set by set_state_from_inventory and similar methods.
2548
2719
 
2549
2720
        If packed_stat and fingerprint are not given, they're invalidated in
2550
2721
        the entry.
2559
2730
        new_details = (minikind, fingerprint, size, executable, packed_stat)
2560
2731
        id_index = self._get_id_index()
2561
2732
        if not present:
 
2733
            # New record. Check there isn't a entry at this path already.
 
2734
            if not fullscan:
 
2735
                low_index, _ = self._find_entry_index(key[0:2] + ('',), block)
 
2736
                while low_index < len(block):
 
2737
                    entry = block[low_index]
 
2738
                    if entry[0][0:2] == key[0:2]:
 
2739
                        if entry[1][0][0] not in 'ar':
 
2740
                            # This entry has the same path (but a different id) as
 
2741
                            # the new entry we're adding, and is present in ths
 
2742
                            # tree.
 
2743
                            raise errors.InconsistentDelta(
 
2744
                                ("%s/%s" % key[0:2]).decode('utf8'), key[2],
 
2745
                                "Attempt to add item at path already occupied by "
 
2746
                                "id %r" % entry[0][2])
 
2747
                        low_index += 1
 
2748
                    else:
 
2749
                        break
2562
2750
            # new entry, synthesis cross reference here,
2563
2751
            existing_keys = id_index.setdefault(key[2], set())
2564
2752
            if not existing_keys:
2569
2757
                # grab one of them and use it to generate parent
2570
2758
                # relocation/absent entries.
2571
2759
                new_entry = key, [new_details]
2572
 
                for other_key in existing_keys:
 
2760
                # existing_keys can be changed as we iterate.
 
2761
                for other_key in tuple(existing_keys):
2573
2762
                    # change the record at other to be a pointer to this new
2574
2763
                    # record. The loop looks similar to the change to
2575
2764
                    # relocations when updating an existing record but its not:
2576
2765
                    # the test for existing kinds is different: this can be
2577
2766
                    # factored out to a helper though.
2578
 
                    other_block_index, present = self._find_block_index_from_key(other_key)
2579
 
                    if not present:
2580
 
                        raise AssertionError('could not find block for %s' % (other_key,))
2581
 
                    other_entry_index, present = self._find_entry_index(other_key,
2582
 
                                            self._dirblocks[other_block_index][1])
2583
 
                    if not present:
2584
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2767
                    other_block_index, present = self._find_block_index_from_key(
 
2768
                        other_key)
 
2769
                    if not present:
 
2770
                        raise AssertionError('could not find block for %s' % (
 
2771
                            other_key,))
 
2772
                    other_block = self._dirblocks[other_block_index][1]
 
2773
                    other_entry_index, present = self._find_entry_index(
 
2774
                        other_key, other_block)
 
2775
                    if not present:
 
2776
                        raise AssertionError(
 
2777
                            'update_minimal: could not find other entry for %s'
 
2778
                            % (other_key,))
2585
2779
                    if path_utf8 is None:
2586
2780
                        raise AssertionError('no path')
2587
 
                    self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
2588
 
                        ('r', path_utf8, 0, False, '')
 
2781
                    # Turn this other location into a reference to the new
 
2782
                    # location. This also updates the aliased iterator
 
2783
                    # (current_old in set_state_from_inventory) so that the old
 
2784
                    # entry, if not already examined, is skipped over by that
 
2785
                    # loop.
 
2786
                    other_entry = other_block[other_entry_index]
 
2787
                    other_entry[1][0] = ('r', path_utf8, 0, False, '')
 
2788
                    self._maybe_remove_row(other_block, other_entry_index,
 
2789
                        id_index)
2589
2790
 
 
2791
                # This loop:
 
2792
                # adds a tuple to the new details for each column
 
2793
                #  - either by copying an existing relocation pointer inside that column
 
2794
                #  - or by creating a new pointer to the right row inside that column
2590
2795
                num_present_parents = self._num_present_parents()
 
2796
                if num_present_parents:
 
2797
                    other_key = list(existing_keys)[0]
2591
2798
                for lookup_index in xrange(1, num_present_parents + 1):
2592
2799
                    # grab any one entry, use it to find the right path.
2593
2800
                    # TODO: optimise this to reduce memory use in highly
2600
2807
                    update_entry_index, present = \
2601
2808
                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
2602
2809
                    if not present:
2603
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2810
                        raise AssertionError('update_minimal: could not find entry for %s' % (other_key,))
2604
2811
                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
2605
2812
                    if update_details[0] in 'ar': # relocated, absent
2606
2813
                        # its a pointer or absent in lookup_index's tree, use
2652
2859
 
2653
2860
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2654
2861
 
 
2862
    def _maybe_remove_row(self, block, index, id_index):
 
2863
        """Remove index if it is absent or relocated across the row.
 
2864
        
 
2865
        id_index is updated accordingly.
 
2866
        """
 
2867
        present_in_row = False
 
2868
        entry = block[index]
 
2869
        for column in entry[1]:
 
2870
            if column[0] not in 'ar':
 
2871
                present_in_row = True
 
2872
                break
 
2873
        if not present_in_row:
 
2874
            block.pop(index)
 
2875
            id_index[entry[0][2]].remove(entry[0])
 
2876
 
2655
2877
    def _validate(self):
2656
2878
        """Check that invariants on the dirblock are correct.
2657
2879
 
2791
3013
            if absent_positions == tree_count:
2792
3014
                raise AssertionError(
2793
3015
                    "entry %r has no data for any tree." % (entry,))
 
3016
        if self._id_index is not None:
 
3017
            for file_id, entry_keys in self._id_index.iteritems():
 
3018
                for entry_key in entry_keys:
 
3019
                    if entry_key[2] != file_id:
 
3020
                        raise AssertionError(
 
3021
                            'file_id %r did not match entry key %s'
 
3022
                            % (file_id, entry_key))
2794
3023
 
2795
3024
    def _wipe_state(self):
2796
3025
        """Forget all state information about the dirstate."""
2942
3171
 
2943
3172
class ProcessEntryPython(object):
2944
3173
 
2945
 
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
 
3174
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
2946
3175
        "last_source_parent", "last_target_parent", "include_unchanged",
2947
 
        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
2948
 
        "search_specific_files", "state", "source_index", "target_index",
2949
 
        "want_unversioned", "tree"]
 
3176
        "partial", "use_filesystem_for_exec", "utf8_decode",
 
3177
        "searched_specific_files", "search_specific_files",
 
3178
        "searched_exact_paths", "search_specific_file_parents", "seen_ids",
 
3179
        "state", "source_index", "target_index", "want_unversioned", "tree"]
2950
3180
 
2951
3181
    def __init__(self, include_unchanged, use_filesystem_for_exec,
2952
3182
        search_specific_files, state, source_index, target_index,
2953
3183
        want_unversioned, tree):
2954
3184
        self.old_dirname_to_file_id = {}
2955
3185
        self.new_dirname_to_file_id = {}
2956
 
        # Just a sentry, so that _process_entry can say that this
2957
 
        # record is handled, but isn't interesting to process (unchanged)
2958
 
        self.uninteresting = object()
 
3186
        # Are we doing a partial iter_changes?
 
3187
        self.partial = search_specific_files != set([''])
2959
3188
        # Using a list so that we can access the values and change them in
2960
3189
        # nested scope. Each one is [path, file_id, entry]
2961
3190
        self.last_source_parent = [None, None]
2964
3193
        self.use_filesystem_for_exec = use_filesystem_for_exec
2965
3194
        self.utf8_decode = cache_utf8._utf8_decode
2966
3195
        # for all search_indexs in each path at or under each element of
2967
 
        # search_specific_files, if the detail is relocated: add the id, and add the
2968
 
        # relocated path as one to search if its not searched already. If the
2969
 
        # detail is not relocated, add the id.
 
3196
        # search_specific_files, if the detail is relocated: add the id, and
 
3197
        # add the relocated path as one to search if its not searched already.
 
3198
        # If the detail is not relocated, add the id.
2970
3199
        self.searched_specific_files = set()
 
3200
        # When we search exact paths without expanding downwards, we record
 
3201
        # that here.
 
3202
        self.searched_exact_paths = set()
2971
3203
        self.search_specific_files = search_specific_files
 
3204
        # The parents up to the root of the paths we are searching.
 
3205
        # After all normal paths are returned, these specific items are returned.
 
3206
        self.search_specific_file_parents = set()
 
3207
        # The ids we've sent out in the delta.
 
3208
        self.seen_ids = set()
2972
3209
        self.state = state
2973
3210
        self.source_index = source_index
2974
3211
        self.target_index = target_index
 
3212
        if target_index != 0:
 
3213
            # A lot of code in here depends on target_index == 0
 
3214
            raise errors.BzrError('unsupported target index')
2975
3215
        self.want_unversioned = want_unversioned
2976
3216
        self.tree = tree
2977
3217
 
2979
3219
        """Compare an entry and real disk to generate delta information.
2980
3220
 
2981
3221
        :param path_info: top_relpath, basename, kind, lstat, abspath for
2982
 
            the path of entry. If None, then the path is considered absent.
2983
 
            (Perhaps we should pass in a concrete entry for this ?)
 
3222
            the path of entry. If None, then the path is considered absent in 
 
3223
            the target (Perhaps we should pass in a concrete entry for this ?)
2984
3224
            Basename is returned as a utf8 string because we expect this
2985
3225
            tuple will be ignored, and don't want to take the time to
2986
3226
            decode.
2987
 
        :return: None if these don't match
2988
 
                 A tuple of information about the change, or
2989
 
                 the object 'uninteresting' if these match, but are
2990
 
                 basically identical.
 
3227
        :return: (iter_changes_result, changed). If the entry has not been
 
3228
            handled then changed is None. Otherwise it is False if no content
 
3229
            or metadata changes have occurred, and True if any content or
 
3230
            metadata change has occurred. If self.include_unchanged is True then
 
3231
            if changed is not None, iter_changes_result will always be a result
 
3232
            tuple. Otherwise, iter_changes_result is None unless changed is
 
3233
            True.
2991
3234
        """
2992
3235
        if self.source_index is None:
2993
3236
            source_details = DirState.NULL_PARENT_DETAILS
3092
3335
                        content_change = False
3093
3336
                    target_exec = False
3094
3337
                else:
3095
 
                    raise Exception, "unknown kind %s" % path_info[2]
 
3338
                    if path is None:
 
3339
                        path = pathjoin(old_dirname, old_basename)
 
3340
                    raise errors.BadFileKindError(path, path_info[2])
3096
3341
            if source_minikind == 'd':
3097
3342
                if path is None:
3098
3343
                    old_path = path = pathjoin(old_dirname, old_basename)
3099
3344
                self.old_dirname_to_file_id[old_path] = file_id
3100
3345
            # parent id is the entry for the path in the target tree
3101
 
            if old_dirname == self.last_source_parent[0]:
 
3346
            if old_basename and old_dirname == self.last_source_parent[0]:
3102
3347
                source_parent_id = self.last_source_parent[1]
3103
3348
            else:
3104
3349
                try:
3114
3359
                    self.last_source_parent[0] = old_dirname
3115
3360
                    self.last_source_parent[1] = source_parent_id
3116
3361
            new_dirname = entry[0][0]
3117
 
            if new_dirname == self.last_target_parent[0]:
 
3362
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
3118
3363
                target_parent_id = self.last_target_parent[1]
3119
3364
            else:
3120
3365
                try:
3137
3382
                    self.last_target_parent[1] = target_parent_id
3138
3383
 
3139
3384
            source_exec = source_details[3]
3140
 
            if (self.include_unchanged
3141
 
                or content_change
 
3385
            changed = (content_change
3142
3386
                or source_parent_id != target_parent_id
3143
3387
                or old_basename != entry[0][1]
3144
3388
                or source_exec != target_exec
3145
 
                ):
 
3389
                )
 
3390
            if not changed and not self.include_unchanged:
 
3391
                return None, False
 
3392
            else:
3146
3393
                if old_path is None:
3147
3394
                    old_path = path = pathjoin(old_dirname, old_basename)
3148
3395
                    old_path_u = self.utf8_decode(old_path)[0]
3161
3408
                       (source_parent_id, target_parent_id),
3162
3409
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3163
3410
                       (source_kind, target_kind),
3164
 
                       (source_exec, target_exec))
3165
 
            else:
3166
 
                return self.uninteresting
 
3411
                       (source_exec, target_exec)), changed
3167
3412
        elif source_minikind in 'a' and target_minikind in 'fdlt':
3168
3413
            # looks like a new file
3169
3414
            path = pathjoin(entry[0][0], entry[0][1])
3190
3435
                       (None, parent_id),
3191
3436
                       (None, self.utf8_decode(entry[0][1])[0]),
3192
3437
                       (None, path_info[2]),
3193
 
                       (None, target_exec))
 
3438
                       (None, target_exec)), True
3194
3439
            else:
3195
3440
                # Its a missing file, report it as such.
3196
3441
                return (entry[0][2],
3200
3445
                       (None, parent_id),
3201
3446
                       (None, self.utf8_decode(entry[0][1])[0]),
3202
3447
                       (None, None),
3203
 
                       (None, False))
 
3448
                       (None, False)), True
3204
3449
        elif source_minikind in 'fdlt' and target_minikind in 'a':
3205
3450
            # unversioned, possibly, or possibly not deleted: we dont care.
3206
3451
            # if its still on disk, *and* theres no other entry at this
3218
3463
                   (parent_id, None),
3219
3464
                   (self.utf8_decode(entry[0][1])[0], None),
3220
3465
                   (DirState._minikind_to_kind[source_minikind], None),
3221
 
                   (source_details[3], None))
 
3466
                   (source_details[3], None)), True
3222
3467
        elif source_minikind in 'fdlt' and target_minikind in 'r':
3223
3468
            # a rename; could be a true rename, or a rename inherited from
3224
3469
            # a renamed parent. TODO: handle this efficiently. Its not
3236
3481
                "source_minikind=%r, target_minikind=%r"
3237
3482
                % (source_minikind, target_minikind))
3238
3483
            ## import pdb;pdb.set_trace()
3239
 
        return None
 
3484
        return None, None
3240
3485
 
3241
3486
    def __iter__(self):
3242
3487
        return self
3243
3488
 
 
3489
    def _gather_result_for_consistency(self, result):
 
3490
        """Check a result we will yield to make sure we are consistent later.
 
3491
        
 
3492
        This gathers result's parents into a set to output later.
 
3493
 
 
3494
        :param result: A result tuple.
 
3495
        """
 
3496
        if not self.partial or not result[0]:
 
3497
            return
 
3498
        self.seen_ids.add(result[0])
 
3499
        new_path = result[1][1]
 
3500
        if new_path:
 
3501
            # Not the root and not a delete: queue up the parents of the path.
 
3502
            self.search_specific_file_parents.update(
 
3503
                osutils.parent_directories(new_path.encode('utf8')))
 
3504
            # Add the root directory which parent_directories does not
 
3505
            # provide.
 
3506
            self.search_specific_file_parents.add('')
 
3507
 
3244
3508
    def iter_changes(self):
3245
3509
        """Iterate over the changes."""
3246
3510
        utf8_decode = cache_utf8._utf8_decode
3247
3511
        _cmp_by_dirs = cmp_by_dirs
3248
3512
        _process_entry = self._process_entry
3249
 
        uninteresting = self.uninteresting
3250
3513
        search_specific_files = self.search_specific_files
3251
3514
        searched_specific_files = self.searched_specific_files
3252
3515
        splitpath = osutils.splitpath
3322
3585
                continue
3323
3586
            path_handled = False
3324
3587
            for entry in root_entries:
3325
 
                result = _process_entry(entry, root_dir_info)
3326
 
                if result is not None:
 
3588
                result, changed = _process_entry(entry, root_dir_info)
 
3589
                if changed is not None:
3327
3590
                    path_handled = True
3328
 
                    if result is not uninteresting:
 
3591
                    if changed:
 
3592
                        self._gather_result_for_consistency(result)
 
3593
                    if changed or self.include_unchanged:
3329
3594
                        yield result
3330
3595
            if self.want_unversioned and not path_handled and root_dir_info:
3331
3596
                new_executable = bool(
3441
3706
                        for current_entry in current_block[1]:
3442
3707
                            # entry referring to file not present on disk.
3443
3708
                            # advance the entry only, after processing.
3444
 
                            result = _process_entry(current_entry, None)
3445
 
                            if result is not None:
3446
 
                                if result is not uninteresting:
 
3709
                            result, changed = _process_entry(current_entry, None)
 
3710
                            if changed is not None:
 
3711
                                if changed:
 
3712
                                    self._gather_result_for_consistency(result)
 
3713
                                if changed or self.include_unchanged:
3447
3714
                                    yield result
3448
3715
                        block_index +=1
3449
3716
                        if (block_index < len(self.state._dirblocks) and
3479
3746
                        pass
3480
3747
                    elif current_path_info is None:
3481
3748
                        # no path is fine: the per entry code will handle it.
3482
 
                        result = _process_entry(current_entry, current_path_info)
3483
 
                        if result is not None:
3484
 
                            if result is not uninteresting:
 
3749
                        result, changed = _process_entry(current_entry, current_path_info)
 
3750
                        if changed is not None:
 
3751
                            if changed:
 
3752
                                self._gather_result_for_consistency(result)
 
3753
                            if changed or self.include_unchanged:
3485
3754
                                yield result
3486
3755
                    elif (current_entry[0][1] != current_path_info[1]
3487
3756
                          or current_entry[1][self.target_index][0] in 'ar'):
3500
3769
                        else:
3501
3770
                            # entry referring to file not present on disk.
3502
3771
                            # advance the entry only, after processing.
3503
 
                            result = _process_entry(current_entry, None)
3504
 
                            if result is not None:
3505
 
                                if result is not uninteresting:
 
3772
                            result, changed = _process_entry(current_entry, None)
 
3773
                            if changed is not None:
 
3774
                                if changed:
 
3775
                                    self._gather_result_for_consistency(result)
 
3776
                                if changed or self.include_unchanged:
3506
3777
                                    yield result
3507
3778
                            advance_path = False
3508
3779
                    else:
3509
 
                        result = _process_entry(current_entry, current_path_info)
3510
 
                        if result is not None:
 
3780
                        result, changed = _process_entry(current_entry, current_path_info)
 
3781
                        if changed is not None:
3511
3782
                            path_handled = True
3512
 
                            if result is not uninteresting:
 
3783
                            if changed:
 
3784
                                self._gather_result_for_consistency(result)
 
3785
                            if changed or self.include_unchanged:
3513
3786
                                yield result
3514
3787
                    if advance_entry and current_entry is not None:
3515
3788
                        entry_index += 1
3574
3847
                        current_dir_info = dir_iterator.next()
3575
3848
                    except StopIteration:
3576
3849
                        current_dir_info = None
 
3850
        for result in self._iter_specific_file_parents():
 
3851
            yield result
 
3852
 
 
3853
    def _iter_specific_file_parents(self):
 
3854
        """Iter over the specific file parents."""
 
3855
        while self.search_specific_file_parents:
 
3856
            # Process the parent directories for the paths we were iterating.
 
3857
            # Even in extremely large trees this should be modest, so currently
 
3858
            # no attempt is made to optimise.
 
3859
            path_utf8 = self.search_specific_file_parents.pop()
 
3860
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
3861
                # We've examined this path.
 
3862
                continue
 
3863
            if path_utf8 in self.searched_exact_paths:
 
3864
                # We've examined this path.
 
3865
                continue
 
3866
            path_entries = self.state._entries_for_path(path_utf8)
 
3867
            # We need either one or two entries. If the path in
 
3868
            # self.target_index has moved (so the entry in source_index is in
 
3869
            # 'ar') then we need to also look for the entry for this path in
 
3870
            # self.source_index, to output the appropriate delete-or-rename.
 
3871
            selected_entries = []
 
3872
            found_item = False
 
3873
            for candidate_entry in path_entries:
 
3874
                # Find entries present in target at this path:
 
3875
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
3876
                    found_item = True
 
3877
                    selected_entries.append(candidate_entry)
 
3878
                # Find entries present in source at this path:
 
3879
                elif (self.source_index is not None and
 
3880
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
3881
                    found_item = True
 
3882
                    if candidate_entry[1][self.target_index][0] == 'a':
 
3883
                        # Deleted, emit it here.
 
3884
                        selected_entries.append(candidate_entry)
 
3885
                    else:
 
3886
                        # renamed, emit it when we process the directory it
 
3887
                        # ended up at.
 
3888
                        self.search_specific_file_parents.add(
 
3889
                            candidate_entry[1][self.target_index][1])
 
3890
            if not found_item:
 
3891
                raise AssertionError(
 
3892
                    "Missing entry for specific path parent %r, %r" % (
 
3893
                    path_utf8, path_entries))
 
3894
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
3895
            for entry in selected_entries:
 
3896
                if entry[0][2] in self.seen_ids:
 
3897
                    continue
 
3898
                result, changed = self._process_entry(entry, path_info)
 
3899
                if changed is None:
 
3900
                    raise AssertionError(
 
3901
                        "Got entry<->path mismatch for specific path "
 
3902
                        "%r entry %r path_info %r " % (
 
3903
                        path_utf8, entry, path_info))
 
3904
                # Only include changes - we're outside the users requested
 
3905
                # expansion.
 
3906
                if changed:
 
3907
                    self._gather_result_for_consistency(result)
 
3908
                    if (result[6][0] == 'directory' and
 
3909
                        result[6][1] != 'directory'):
 
3910
                        # This stopped being a directory, the old children have
 
3911
                        # to be included.
 
3912
                        if entry[1][self.source_index][0] == 'r':
 
3913
                            # renamed, take the source path
 
3914
                            entry_path_utf8 = entry[1][self.source_index][1]
 
3915
                        else:
 
3916
                            entry_path_utf8 = path_utf8
 
3917
                        initial_key = (entry_path_utf8, '', '')
 
3918
                        block_index, _ = self.state._find_block_index_from_key(
 
3919
                            initial_key)
 
3920
                        if block_index == 0:
 
3921
                            # The children of the root are in block index 1.
 
3922
                            block_index +=1
 
3923
                        current_block = None
 
3924
                        if block_index < len(self.state._dirblocks):
 
3925
                            current_block = self.state._dirblocks[block_index]
 
3926
                            if not osutils.is_inside(
 
3927
                                entry_path_utf8, current_block[0]):
 
3928
                                # No entries for this directory at all.
 
3929
                                current_block = None
 
3930
                        if current_block is not None:
 
3931
                            for entry in current_block[1]:
 
3932
                                if entry[1][self.source_index][0] in 'ar':
 
3933
                                    # Not in the source tree, so doesn't have to be
 
3934
                                    # included.
 
3935
                                    continue
 
3936
                                # Path of the entry itself.
 
3937
 
 
3938
                                self.search_specific_file_parents.add(
 
3939
                                    osutils.pathjoin(*entry[0][:2]))
 
3940
                if changed or self.include_unchanged:
 
3941
                    yield result
 
3942
            self.searched_exact_paths.add(path_utf8)
 
3943
 
 
3944
    def _path_info(self, utf8_path, unicode_path):
 
3945
        """Generate path_info for unicode_path.
 
3946
 
 
3947
        :return: None if unicode_path does not exist, or a path_info tuple.
 
3948
        """
 
3949
        abspath = self.tree.abspath(unicode_path)
 
3950
        try:
 
3951
            stat = os.lstat(abspath)
 
3952
        except OSError, e:
 
3953
            if e.errno == errno.ENOENT:
 
3954
                # the path does not exist.
 
3955
                return None
 
3956
            else:
 
3957
                raise
 
3958
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
3959
        dir_info = (utf8_path, utf8_basename,
 
3960
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
3961
            abspath)
 
3962
        if dir_info[2] == 'directory':
 
3963
            if self.tree._directory_is_tree_reference(
 
3964
                unicode_path):
 
3965
                self.root_dir_info = self.root_dir_info[:2] + \
 
3966
                    ('tree-reference',) + self.root_dir_info[3:]
 
3967
        return dir_info
3577
3968
 
3578
3969
 
3579
3970
# Try to load the compiled form if possible
3587
3978
        ProcessEntryC as _process_entry,
3588
3979
        update_entry as update_entry,
3589
3980
        )
3590
 
except ImportError:
 
3981
except ImportError, e:
 
3982
    osutils.failed_to_load_extension(e)
3591
3983
    from bzrlib._dirstate_helpers_py import (
3592
3984
        _read_dirblocks,
3593
3985
        bisect_dirblock,