264
263
# return '%X.%X' % (int(st.st_mtime), st.st_mode)
267
class SHA1Provider(object):
268
"""An interface for getting sha1s of a file."""
270
def sha1(self, abspath):
271
"""Return the sha1 of a file given its absolute path.
273
:param abspath: May be a filesystem encoded absolute path
276
raise NotImplementedError(self.sha1)
278
def stat_and_sha1(self, abspath):
279
"""Return the stat and sha1 of a file given its absolute path.
281
:param abspath: May be a filesystem encoded absolute path
284
Note: the stat should be the stat of the physical file
285
while the sha may be the sha of its canonical content.
287
raise NotImplementedError(self.stat_and_sha1)
290
class DefaultSHA1Provider(SHA1Provider):
291
"""A SHA1Provider that reads directly from the filesystem."""
293
def sha1(self, abspath):
294
"""Return the sha1 of a file given its absolute path."""
295
return osutils.sha_file_by_name(abspath)
297
def stat_and_sha1(self, abspath):
298
"""Return the stat and sha1 of a file given its absolute path."""
299
file_obj = file(abspath, 'rb')
301
statvalue = os.fstat(file_obj.fileno())
302
sha1 = osutils.sha_file(file_obj)
305
return statvalue, sha1
308
266
class DirState(object):
309
267
"""Record directory and metadata state for fast access.
461
422
raise AssertionError(
462
423
"must be a utf8 file_id not %s" % (type(file_id), ))
463
424
# Make sure the file_id does not exist in this tree
465
file_id_entry = self._get_entry(0, fileid_utf8=file_id, include_deleted=True)
425
file_id_entry = self._get_entry(0, fileid_utf8=file_id)
466
426
if file_id_entry != (None, None):
467
if file_id_entry[1][0][0] == 'a':
468
if file_id_entry[0] != (dirname, basename, file_id):
469
# set the old name's current operation to rename
470
self.update_minimal(file_id_entry[0],
476
rename_from = file_id_entry[0][0:2]
478
path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])
479
kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]
480
info = '%s:%s' % (kind, path)
481
raise errors.DuplicateFileId(file_id, info)
427
path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])
428
kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]
429
info = '%s:%s' % (kind, path)
430
raise errors.DuplicateFileId(file_id, info)
482
431
first_key = (dirname, basename, '')
483
432
block_index, present = self._find_block_index_from_key(first_key)
485
434
# check the path is not in the tree
486
435
block = self._dirblocks[block_index][1]
487
436
entry_index, _ = self._find_entry_index(first_key, block)
488
while (entry_index < len(block) and
437
while (entry_index < len(block) and
489
438
block[entry_index][0][0:2] == first_key[0:2]):
490
439
if block[entry_index][1][0][0] not in 'ar':
491
440
# this path is in the dirstate in the current tree.
1278
1218
def update_by_delta(self, delta):
1279
1219
"""Apply an inventory delta to the dirstate for tree 0
1281
This is the workhorse for apply_inventory_delta in dirstate based
1284
1221
:param delta: An inventory delta. See Inventory.apply_delta for
1287
1224
self._read_dirblocks_if_needed()
1288
encode = cache_utf8.encode
1289
1225
insertions = {}
1291
# Accumulate parent references (path_utf8, id), to check for parentless
1292
# items or items placed under files/links/tree-references. We get
1293
# references from every item in the delta that is not a deletion and
1294
# is not itself the root.
1296
# Added ids must not be in the dirstate already. This set holds those
1299
# This loop transforms the delta to single atomic operations that can
1300
# be executed and validated.
1301
for old_path, new_path, file_id, inv_entry in sorted(
1302
inventory._check_delta_unique_old_paths(
1303
inventory._check_delta_unique_new_paths(
1304
inventory._check_delta_ids_match_entry(
1305
inventory._check_delta_ids_are_valid(
1306
inventory._check_delta_new_path_entry_both_or_None(delta))))),
1227
for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
1308
1228
if (file_id in insertions) or (file_id in removals):
1309
raise errors.InconsistentDelta(old_path or new_path, file_id,
1229
raise AssertionError("repeated file id in delta %r" % (file_id,))
1311
1230
if old_path is not None:
1312
1231
old_path = old_path.encode('utf-8')
1313
1232
removals[file_id] = old_path
1315
new_ids.add(file_id)
1316
1233
if new_path is not None:
1317
if inv_entry is None:
1318
raise errors.InconsistentDelta(new_path, file_id,
1319
"new_path with no entry")
1320
1234
new_path = new_path.encode('utf-8')
1321
dirname_utf8, basename = osutils.split(new_path)
1323
parents.add((dirname_utf8, inv_entry.parent_id))
1324
key = (dirname_utf8, basename, file_id)
1235
dirname, basename = osutils.split(new_path)
1236
key = (dirname, basename, file_id)
1325
1237
minikind = DirState._kind_to_minikind[inv_entry.kind]
1326
1238
if minikind == 't':
1327
fingerprint = inv_entry.reference_revision or ''
1239
fingerprint = inv_entry.reference_revision
1329
1241
fingerprint = ''
1330
1242
insertions[file_id] = (key, minikind, inv_entry.executable,
1339
1251
minikind = child[1][0][0]
1340
1252
fingerprint = child[1][0][4]
1341
1253
executable = child[1][0][3]
1342
old_child_path = osutils.pathjoin(child_dirname,
1254
old_child_path = osutils.pathjoin(child[0][0],
1344
1256
removals[child[0][2]] = old_child_path
1345
1257
child_suffix = child_dirname[len(old_path):]
1346
1258
new_child_dirname = (new_path + child_suffix)
1347
1259
key = (new_child_dirname, child_basename, child[0][2])
1348
new_child_path = osutils.pathjoin(new_child_dirname,
1260
new_child_path = os.path.join(new_child_dirname,
1350
1262
insertions[child[0][2]] = (key, minikind, executable,
1351
1263
fingerprint, new_child_path)
1352
self._check_delta_ids_absent(new_ids, delta, 0)
1354
self._apply_removals(removals.iteritems())
1355
self._apply_insertions(insertions.values())
1357
self._after_delta_check_parents(parents, 0)
1358
except errors.BzrError, e:
1359
self._changes_aborted = True
1360
if 'integrity error' not in str(e):
1362
# _get_entry raises BzrError when a request is inconsistent; we
1363
# want such errors to be shown as InconsistentDelta - and that
1364
# fits the behaviour we trigger.
1365
raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1264
self._apply_removals(removals.values())
1265
self._apply_insertions(insertions.values())
1367
1267
def _apply_removals(self, removals):
1368
for file_id, path in sorted(removals, reverse=True,
1369
key=operator.itemgetter(1)):
1268
for path in sorted(removals, reverse=True):
1370
1269
dirname, basename = osutils.split(path)
1371
1270
block_i, entry_i, d_present, f_present = \
1372
1271
self._get_block_entry_index(dirname, basename, 0)
1374
entry = self._dirblocks[block_i][1][entry_i]
1376
self._changes_aborted = True
1377
raise errors.InconsistentDelta(path, file_id,
1378
"Wrong path for old path.")
1379
if not f_present or entry[1][0][0] in 'ar':
1380
self._changes_aborted = True
1381
raise errors.InconsistentDelta(path, file_id,
1382
"Wrong path for old path.")
1383
if file_id != entry[0][2]:
1384
self._changes_aborted = True
1385
raise errors.InconsistentDelta(path, file_id,
1386
"Attempt to remove path has wrong id - found %r."
1272
entry = self._dirblocks[block_i][1][entry_i]
1388
1273
self._make_absent(entry)
1389
1274
# See if we have a malformed delta: deleting a directory must not
1390
1275
# leave crud behind. This increases the number of bisects needed
1398
1283
# be due to it being in a parent tree, or a corrupt delta.
1399
1284
for child_entry in self._dirblocks[block_i][1]:
1400
1285
if child_entry[1][0][0] not in ('r', 'a'):
1401
self._changes_aborted = True
1402
1286
raise errors.InconsistentDelta(path, entry[0][2],
1403
1287
"The file id was deleted but its children were "
1404
1288
"not deleted.")
1406
1290
def _apply_insertions(self, adds):
1408
for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1409
self.update_minimal(key, minikind, executable, fingerprint,
1410
path_utf8=path_utf8)
1411
except errors.NotVersionedError:
1412
self._changes_aborted = True
1413
raise errors.InconsistentDelta(path_utf8.decode('utf8'), key[2],
1291
for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1292
self.update_minimal(key, minikind, executable, fingerprint,
1293
path_utf8=path_utf8)
1416
1295
def update_basis_by_delta(self, delta, new_revid):
1417
1296
"""Update the parents of this tree after a commit.
1461
1340
# At the same time, to reduce interface friction we convert the input
1462
1341
# inventory entries to dirstate.
1463
1342
root_only = ('', '')
1464
# Accumulate parent references (path_utf8, id), to check for parentless
1465
# items or items placed under files/links/tree-references. We get
1466
# references from every item in the delta that is not a deletion and
1467
# is not itself the root.
1469
# Added ids must not be in the dirstate already. This set holds those
1472
1343
for old_path, new_path, file_id, inv_entry in delta:
1473
if inv_entry is not None and file_id != inv_entry.file_id:
1474
raise errors.InconsistentDelta(new_path, file_id,
1475
"mismatched entry file_id %r" % inv_entry)
1476
if new_path is not None:
1477
if inv_entry is None:
1478
raise errors.InconsistentDelta(new_path, file_id,
1479
"new_path with no entry")
1480
new_path_utf8 = encode(new_path)
1481
# note the parent for validation
1482
dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
1484
parents.add((dirname_utf8, inv_entry.parent_id))
1485
1344
if old_path is None:
1486
1345
adds.append((None, encode(new_path), file_id,
1487
1346
inv_to_entry(inv_entry), True))
1488
new_ids.add(file_id)
1489
1347
elif new_path is None:
1490
1348
deletes.append((encode(old_path), None, file_id, None, True))
1491
1349
elif (old_path, new_path) != root_only:
1534
1393
# of everything.
1535
1394
changes.append((encode(old_path), encode(new_path), file_id,
1536
1395
inv_to_entry(inv_entry)))
1537
self._check_delta_ids_absent(new_ids, delta, 1)
1539
# Finish expunging deletes/first half of renames.
1540
self._update_basis_apply_deletes(deletes)
1541
# Reinstate second half of renames and new paths.
1542
self._update_basis_apply_adds(adds)
1543
# Apply in-situ changes.
1544
self._update_basis_apply_changes(changes)
1546
self._after_delta_check_parents(parents, 1)
1547
except errors.BzrError, e:
1548
self._changes_aborted = True
1549
if 'integrity error' not in str(e):
1551
# _get_entry raises BzrError when a request is inconsistent; we
1552
# want such errors to be shown as InconsistentDelta - and that
1553
# fits the behaviour we trigger. Partof this is driven by dirstate
1554
# only supporting deltas that turn the basis into a closer fit to
1556
raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1397
# Finish expunging deletes/first half of renames.
1398
self._update_basis_apply_deletes(deletes)
1399
# Reinstate second half of renames and new paths.
1400
self._update_basis_apply_adds(adds)
1401
# Apply in-situ changes.
1402
self._update_basis_apply_changes(changes)
1558
1404
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1559
1405
self._header_state = DirState.IN_MEMORY_MODIFIED
1560
1406
self._id_index = None
1563
def _check_delta_ids_absent(self, new_ids, delta, tree_index):
1564
"""Check that none of the file_ids in new_ids are present in a tree."""
1567
id_index = self._get_id_index()
1568
for file_id in new_ids:
1569
for key in id_index.get(file_id, []):
1570
block_i, entry_i, d_present, f_present = \
1571
self._get_block_entry_index(key[0], key[1], tree_index)
1573
# In a different tree
1575
entry = self._dirblocks[block_i][1][entry_i]
1576
if entry[0][2] != file_id:
1577
# Different file_id, so not what we want.
1579
# NB: No changes made before this helper is called, so no need
1580
# to set the _changes_aborted flag.
1581
raise errors.InconsistentDelta(
1582
("%s/%s" % key[0:2]).decode('utf8'), file_id,
1583
"This file_id is new in the delta but already present in "
1586
1409
def _update_basis_apply_adds(self, adds):
1587
1410
"""Apply a sequence of adds to tree 1 during update_basis_by_delta.
1692
1514
# it is being resurrected here, so blank it out temporarily.
1693
1515
self._dirblocks[block_index][1][entry_index][1][1] = null
1695
def _after_delta_check_parents(self, parents, index):
1696
"""Check that parents required by the delta are all intact.
1698
:param parents: An iterable of (path_utf8, file_id) tuples which are
1699
required to be present in tree 'index' at path_utf8 with id file_id
1701
:param index: The column in the dirstate to check for parents in.
1703
for dirname_utf8, file_id in parents:
1704
# Get the entry - the ensures that file_id, dirname_utf8 exists and
1705
# has the right file id.
1706
entry = self._get_entry(index, file_id, dirname_utf8)
1707
if entry[1] is None:
1708
self._changes_aborted = True
1709
raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
1710
file_id, "This parent is not present.")
1711
# Parents of things must be directories
1712
if entry[1][index][0] != 'd':
1713
self._changes_aborted = True
1714
raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
1715
file_id, "This parent is not a directory.")
1717
1517
def _observed_sha1(self, entry, sha1, stat_value,
1718
1518
_stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
1719
1519
"""Note the sha1 of a file.
2177
1953
return len(self._parents) - len(self._ghosts)
2180
def on_file(path, sha1_provider=None):
2181
"""Construct a DirState on the file at path "path".
1956
def on_file(path, content_filter_stack_provider=None):
1957
"""Construct a DirState on the file at path path.
2183
:param path: The path at which the dirstate file on disk should live.
2184
:param sha1_provider: an object meeting the SHA1Provider interface.
2185
If None, a DefaultSHA1Provider is used.
1959
:param content_filter_stack_provider: a function that takes a
1960
path (relative to the top of the tree) and a file-id as
1961
parameters and returns a stack of ContentFilters.
1962
If None, no content filtering is performed.
2186
1963
:return: An unlocked DirState object, associated with the given path.
2188
if sha1_provider is None:
2189
sha1_provider = DefaultSHA1Provider()
2190
result = DirState(path, sha1_provider)
1965
result = DirState(path, content_filter_stack_provider)
2193
1968
def _read_dirblocks_if_needed(self):
2194
1969
"""Read in all the dirblocks from the file if they are not in memory.
2196
1971
This populates self._dirblocks, and sets self._dirblock_state to
2197
1972
IN_MEMORY_UNMODIFIED. It is not currently ready for incremental block
2356
2131
self.update_minimal(('', '', new_id), 'd',
2357
2132
path_utf8='', packed_stat=entry[1][0][4])
2358
2133
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2134
if self._id_index is not None:
2135
self._id_index.setdefault(new_id, set()).add(entry[0])
2360
2137
def set_parent_trees(self, trees, ghosts):
2361
2138
"""Set the parent trees for the dirstate.
2363
2140
:param trees: A list of revision_id, tree tuples. tree must be provided
2364
even if the revision_id refers to a ghost: supply an empty tree in
2141
even if the revision_id refers to a ghost: supply an empty tree in
2366
2143
:param ghosts: A list of the revision_ids that are ghosts at the time
2369
# TODO: generate a list of parent indexes to preserve to save
2146
# TODO: generate a list of parent indexes to preserve to save
2370
2147
# processing specific parent trees. In the common case one tree will
2371
2148
# be preserved - the left most parent.
2372
2149
# TODO: if the parent tree is a dirstate, we might want to walk them
2620
2380
and new_entry_key[1:] < current_old[0][1:])):
2621
2381
# new comes before:
2622
2382
# add a entry for this and advance new
2624
trace.mutter("Inserting from new '%s'.",
2625
new_path_utf8.decode('utf8'))
2626
2383
self.update_minimal(new_entry_key, current_new_minikind,
2627
2384
executable=current_new[1].executable,
2628
path_utf8=new_path_utf8, fingerprint=fingerprint,
2385
path_utf8=new_path_utf8, fingerprint=fingerprint)
2630
2386
current_new = advance(new_iterator)
2632
2388
# we've advanced past the place where the old key would be,
2633
2389
# without seeing it in the new list. so it must be gone.
2635
trace.mutter("Deleting from old '%s/%s'.",
2636
current_old[0][0].decode('utf8'),
2637
current_old[0][1].decode('utf8'))
2638
2390
self._make_absent(current_old)
2639
2391
current_old = advance(old_iterator)
2640
2392
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2641
2393
self._id_index = None
2642
2394
self._packed_stat_index = None
2644
trace.mutter("set_state_from_inventory complete.")
2646
2396
def _make_absent(self, current_old):
2647
2397
"""Mark current_old - an entry - as absent for tree 0.
2757
2487
# grab one of them and use it to generate parent
2758
2488
# relocation/absent entries.
2759
2489
new_entry = key, [new_details]
2760
# existing_keys can be changed as we iterate.
2761
for other_key in tuple(existing_keys):
2490
for other_key in existing_keys:
2762
2491
# change the record at other to be a pointer to this new
2763
2492
# record. The loop looks similar to the change to
2764
2493
# relocations when updating an existing record but its not:
2765
2494
# the test for existing kinds is different: this can be
2766
2495
# factored out to a helper though.
2767
other_block_index, present = self._find_block_index_from_key(
2770
raise AssertionError('could not find block for %s' % (
2772
other_block = self._dirblocks[other_block_index][1]
2773
other_entry_index, present = self._find_entry_index(
2774
other_key, other_block)
2776
raise AssertionError(
2777
'update_minimal: could not find other entry for %s'
2496
other_block_index, present = self._find_block_index_from_key(other_key)
2498
raise AssertionError('could not find block for %s' % (other_key,))
2499
other_entry_index, present = self._find_entry_index(other_key,
2500
self._dirblocks[other_block_index][1])
2502
raise AssertionError('could not find entry for %s' % (other_key,))
2779
2503
if path_utf8 is None:
2780
2504
raise AssertionError('no path')
2781
# Turn this other location into a reference to the new
2782
# location. This also updates the aliased iterator
2783
# (current_old in set_state_from_inventory) so that the old
2784
# entry, if not already examined, is skipped over by that
2786
other_entry = other_block[other_entry_index]
2787
other_entry[1][0] = ('r', path_utf8, 0, False, '')
2788
self._maybe_remove_row(other_block, other_entry_index,
2505
self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
2506
('r', path_utf8, 0, False, '')
2792
# adds a tuple to the new details for each column
2793
# - either by copying an existing relocation pointer inside that column
2794
# - or by creating a new pointer to the right row inside that column
2795
2508
num_present_parents = self._num_present_parents()
2796
if num_present_parents:
2797
other_key = list(existing_keys)[0]
2798
2509
for lookup_index in xrange(1, num_present_parents + 1):
2799
2510
# grab any one entry, use it to find the right path.
2800
# TODO: optimise this to reduce memory use in highly
2511
# TODO: optimise this to reduce memory use in highly
2801
2512
# fragmented situations by reusing the relocation
2803
2514
update_block_index, present = \
3131
2818
and stat_value.st_ctime < state._cutoff_time
3132
2819
and len(entry[1]) > 1
3133
2820
and entry[1][1][0] != 'a'):
3134
# Could check for size changes for further optimised
3135
# avoidance of sha1's. However the most prominent case of
3136
# over-shaing is during initial add, which this catches.
3137
# Besides, if content filtering happens, size and sha
3138
# are calculated at the same time, so checking just the size
3139
# gains nothing w.r.t. performance.
3140
link_or_sha1 = state._sha1_file(abspath)
2821
# Could check for size changes for further optimised
2822
# avoidance of sha1's. However the most prominent case of
2823
# over-shaing is during initial add, which this catches.
2824
# Besides, if content filtering happens, size and sha
2825
# need to be checked together - checking just the size
2827
if state._filter_provider is None:
2830
relpath = osutils.pathjoin(entry[0][0], entry[0][1])
2831
file_id = entry[0][2]
2832
filter_list = state._filter_provider(relpath, file_id)
2833
link_or_sha1 = state._size_sha1_file(abspath, filter_list)[1]
3141
2834
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
3142
2835
executable, packed_stat)
3167
2860
False, DirState.NULLSTAT)
3168
2861
state._dirblock_state = DirState.IN_MEMORY_MODIFIED
3169
2862
return link_or_sha1
2863
update_entry = py_update_entry
3172
2866
class ProcessEntryPython(object):
3174
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
2868
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
3175
2869
"last_source_parent", "last_target_parent", "include_unchanged",
3176
"partial", "use_filesystem_for_exec", "utf8_decode",
3177
"searched_specific_files", "search_specific_files",
3178
"searched_exact_paths", "search_specific_file_parents", "seen_ids",
3179
"state", "source_index", "target_index", "want_unversioned", "tree"]
2870
"use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
2871
"search_specific_files", "state", "source_index", "target_index",
2872
"want_unversioned", "tree"]
3181
2874
def __init__(self, include_unchanged, use_filesystem_for_exec,
3182
2875
search_specific_files, state, source_index, target_index,
3183
2876
want_unversioned, tree):
3184
2877
self.old_dirname_to_file_id = {}
3185
2878
self.new_dirname_to_file_id = {}
3186
# Are we doing a partial iter_changes?
3187
self.partial = search_specific_files != set([''])
2879
# Just a sentry, so that _process_entry can say that this
2880
# record is handled, but isn't interesting to process (unchanged)
2881
self.uninteresting = object()
3188
2882
# Using a list so that we can access the values and change them in
3189
2883
# nested scope. Each one is [path, file_id, entry]
3190
2884
self.last_source_parent = [None, None]
3193
2887
self.use_filesystem_for_exec = use_filesystem_for_exec
3194
2888
self.utf8_decode = cache_utf8._utf8_decode
3195
2889
# for all search_indexs in each path at or under each element of
3196
# search_specific_files, if the detail is relocated: add the id, and
3197
# add the relocated path as one to search if its not searched already.
3198
# If the detail is not relocated, add the id.
2890
# search_specific_files, if the detail is relocated: add the id, and add the
2891
# relocated path as one to search if its not searched already. If the
2892
# detail is not relocated, add the id.
3199
2893
self.searched_specific_files = set()
3200
# When we search exact paths without expanding downwards, we record
3202
self.searched_exact_paths = set()
3203
2894
self.search_specific_files = search_specific_files
3204
# The parents up to the root of the paths we are searching.
3205
# After all normal paths are returned, these specific items are returned.
3206
self.search_specific_file_parents = set()
3207
# The ids we've sent out in the delta.
3208
self.seen_ids = set()
3209
2895
self.state = state
3210
2896
self.source_index = source_index
3211
2897
self.target_index = target_index
3212
if target_index != 0:
3213
# A lot of code in here depends on target_index == 0
3214
raise errors.BzrError('unsupported target index')
3215
2898
self.want_unversioned = want_unversioned
3216
2899
self.tree = tree
3219
2902
"""Compare an entry and real disk to generate delta information.
3221
2904
:param path_info: top_relpath, basename, kind, lstat, abspath for
3222
the path of entry. If None, then the path is considered absent in
3223
the target (Perhaps we should pass in a concrete entry for this ?)
2905
the path of entry. If None, then the path is considered absent.
2906
(Perhaps we should pass in a concrete entry for this ?)
3224
2907
Basename is returned as a utf8 string because we expect this
3225
2908
tuple will be ignored, and don't want to take the time to
3227
:return: (iter_changes_result, changed). If the entry has not been
3228
handled then changed is None. Otherwise it is False if no content
3229
or metadata changes have occurred, and True if any content or
3230
metadata change has occurred. If self.include_unchanged is True then
3231
if changed is not None, iter_changes_result will always be a result
3232
tuple. Otherwise, iter_changes_result is None unless changed is
2910
:return: None if these don't match
2911
A tuple of information about the change, or
2912
the object 'uninteresting' if these match, but are
2913
basically identical.
3235
2915
if self.source_index is None:
3236
2916
source_details = DirState.NULL_PARENT_DETAILS
3304
2984
if source_minikind != 'f':
3305
2985
content_change = True
3307
# Check the sha. We can't just rely on the size as
3308
# content filtering may mean differ sizes actually
3309
# map to the same content
3310
if link_or_sha1 is None:
3312
statvalue, link_or_sha1 = \
3313
self.state._sha1_provider.stat_and_sha1(
3315
self.state._observed_sha1(entry, link_or_sha1,
3317
content_change = (link_or_sha1 != source_details[1])
2987
# If the size is the same, check the sha:
2988
if target_details[2] == source_details[2]:
2989
if link_or_sha1 is None:
2991
file_obj = file(path_info[4], 'rb')
2993
statvalue = os.fstat(file_obj.fileno())
2994
link_or_sha1 = osutils.sha_file(file_obj)
2997
self.state._observed_sha1(entry, link_or_sha1,
2999
content_change = (link_or_sha1 != source_details[1])
3001
# Size changed, so must be different
3002
content_change = True
3318
3003
# Target details is updated at update_entry time
3319
3004
if self.use_filesystem_for_exec:
3320
3005
# We don't need S_ISREG here, because we are sure
3481
3164
"source_minikind=%r, target_minikind=%r"
3482
3165
% (source_minikind, target_minikind))
3483
3166
## import pdb;pdb.set_trace()
3486
3169
def __iter__(self):
3489
def _gather_result_for_consistency(self, result):
3490
"""Check a result we will yield to make sure we are consistent later.
3492
This gathers result's parents into a set to output later.
3494
:param result: A result tuple.
3496
if not self.partial or not result[0]:
3498
self.seen_ids.add(result[0])
3499
new_path = result[1][1]
3501
# Not the root and not a delete: queue up the parents of the path.
3502
self.search_specific_file_parents.update(
3503
osutils.parent_directories(new_path.encode('utf8')))
3504
# Add the root directory which parent_directories does not
3506
self.search_specific_file_parents.add('')
3508
3172
def iter_changes(self):
3509
3173
"""Iterate over the changes."""
3510
3174
utf8_decode = cache_utf8._utf8_decode
3511
3175
_cmp_by_dirs = cmp_by_dirs
3512
3176
_process_entry = self._process_entry
3177
uninteresting = self.uninteresting
3513
3178
search_specific_files = self.search_specific_files
3514
3179
searched_specific_files = self.searched_specific_files
3515
3180
splitpath = osutils.splitpath
3517
3182
# compare source_index and target_index at or under each element of search_specific_files.
3518
3183
# follow the following comparison table. Note that we only want to do diff operations when
3519
# the target is fdl because thats when the walkdirs logic will have exposed the pathinfo
3184
# the target is fdl because thats when the walkdirs logic will have exposed the pathinfo
3520
3185
# for the target.
3523
3188
# Source | Target | disk | action
3524
3189
# r | fdlt | | add source to search, add id path move and perform
3525
3190
# | | | diff check on source-target
3526
# r | fdlt | a | dangling file that was present in the basis.
3191
# r | fdlt | a | dangling file that was present in the basis.
3528
3193
# r | a | | add source to search
3530
3195
# r | r | | this path is present in a non-examined tree, skip.
3531
3196
# r | r | a | this path is present in a non-examined tree, skip.
3532
3197
# a | fdlt | | add new id
3847
3502
current_dir_info = dir_iterator.next()
3848
3503
except StopIteration:
3849
3504
current_dir_info = None
3850
for result in self._iter_specific_file_parents():
3853
def _iter_specific_file_parents(self):
3854
"""Iter over the specific file parents."""
3855
while self.search_specific_file_parents:
3856
# Process the parent directories for the paths we were iterating.
3857
# Even in extremely large trees this should be modest, so currently
3858
# no attempt is made to optimise.
3859
path_utf8 = self.search_specific_file_parents.pop()
3860
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
3861
# We've examined this path.
3863
if path_utf8 in self.searched_exact_paths:
3864
# We've examined this path.
3866
path_entries = self.state._entries_for_path(path_utf8)
3867
# We need either one or two entries. If the path in
3868
# self.target_index has moved (so the entry in source_index is in
3869
# 'ar') then we need to also look for the entry for this path in
3870
# self.source_index, to output the appropriate delete-or-rename.
3871
selected_entries = []
3873
for candidate_entry in path_entries:
3874
# Find entries present in target at this path:
3875
if candidate_entry[1][self.target_index][0] not in 'ar':
3877
selected_entries.append(candidate_entry)
3878
# Find entries present in source at this path:
3879
elif (self.source_index is not None and
3880
candidate_entry[1][self.source_index][0] not in 'ar'):
3882
if candidate_entry[1][self.target_index][0] == 'a':
3883
# Deleted, emit it here.
3884
selected_entries.append(candidate_entry)
3886
# renamed, emit it when we process the directory it
3888
self.search_specific_file_parents.add(
3889
candidate_entry[1][self.target_index][1])
3891
raise AssertionError(
3892
"Missing entry for specific path parent %r, %r" % (
3893
path_utf8, path_entries))
3894
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
3895
for entry in selected_entries:
3896
if entry[0][2] in self.seen_ids:
3898
result, changed = self._process_entry(entry, path_info)
3900
raise AssertionError(
3901
"Got entry<->path mismatch for specific path "
3902
"%r entry %r path_info %r " % (
3903
path_utf8, entry, path_info))
3904
# Only include changes - we're outside the users requested
3907
self._gather_result_for_consistency(result)
3908
if (result[6][0] == 'directory' and
3909
result[6][1] != 'directory'):
3910
# This stopped being a directory, the old children have
3912
if entry[1][self.source_index][0] == 'r':
3913
# renamed, take the source path
3914
entry_path_utf8 = entry[1][self.source_index][1]
3916
entry_path_utf8 = path_utf8
3917
initial_key = (entry_path_utf8, '', '')
3918
block_index, _ = self.state._find_block_index_from_key(
3920
if block_index == 0:
3921
# The children of the root are in block index 1.
3923
current_block = None
3924
if block_index < len(self.state._dirblocks):
3925
current_block = self.state._dirblocks[block_index]
3926
if not osutils.is_inside(
3927
entry_path_utf8, current_block[0]):
3928
# No entries for this directory at all.
3929
current_block = None
3930
if current_block is not None:
3931
for entry in current_block[1]:
3932
if entry[1][self.source_index][0] in 'ar':
3933
# Not in the source tree, so doesn't have to be
3936
# Path of the entry itself.
3938
self.search_specific_file_parents.add(
3939
osutils.pathjoin(*entry[0][:2]))
3940
if changed or self.include_unchanged:
3942
self.searched_exact_paths.add(path_utf8)
3944
def _path_info(self, utf8_path, unicode_path):
3945
"""Generate path_info for unicode_path.
3947
:return: None if unicode_path does not exist, or a path_info tuple.
3949
abspath = self.tree.abspath(unicode_path)
3951
stat = os.lstat(abspath)
3953
if e.errno == errno.ENOENT:
3954
# the path does not exist.
3958
utf8_basename = utf8_path.rsplit('/', 1)[-1]
3959
dir_info = (utf8_path, utf8_basename,
3960
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
3962
if dir_info[2] == 'directory':
3963
if self.tree._directory_is_tree_reference(
3965
self.root_dir_info = self.root_dir_info[:2] + \
3966
('tree-reference',) + self.root_dir_info[3:]
3505
_process_entry = ProcessEntryPython
3970
3508
# Try to load the compiled form if possible
3972
from bzrlib._dirstate_helpers_pyx import (
3510
from bzrlib._dirstate_helpers_c import (
3511
_read_dirblocks_c as _read_dirblocks,
3512
bisect_dirblock_c as bisect_dirblock,
3513
_bisect_path_left_c as _bisect_path_left,
3514
_bisect_path_right_c as _bisect_path_right,
3515
cmp_by_dirs_c as cmp_by_dirs,
3978
3516
ProcessEntryC as _process_entry,
3979
3517
update_entry as update_entry,
3981
except ImportError, e:
3982
osutils.failed_to_load_extension(e)
3983
3520
from bzrlib._dirstate_helpers_py import (
3521
_read_dirblocks_py as _read_dirblocks,
3522
bisect_dirblock_py as bisect_dirblock,
3523
_bisect_path_left_py as _bisect_path_left,
3524
_bisect_path_right_py as _bisect_path_right,
3525
cmp_by_dirs_py as cmp_by_dirs,
3990
# FIXME: It would be nice to be able to track moved lines so that the
3991
# corresponding python code can be moved to the _dirstate_helpers_py
3992
# module. I don't want to break the history for this important piece of
3993
# code so I left the code here -- vila 20090622
3994
update_entry = py_update_entry
3995
_process_entry = ProcessEntryPython