264
265
# return '%X.%X' % (int(st.st_mtime), st.st_mode)
268
def _unpack_stat(packed_stat):
269
"""Turn a packed_stat back into the stat fields.
271
This is meant as a debugging tool, should not be used in real code.
273
(st_size, st_mtime, st_ctime, st_dev, st_ino,
274
st_mode) = struct.unpack('>LLLLLL', binascii.a2b_base64(packed_stat))
275
return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime,
276
st_dev=st_dev, st_ino=st_ino, st_mode=st_mode)
267
279
class SHA1Provider(object):
268
280
"""An interface for getting sha1s of a file."""
363
375
HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'
364
376
HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'
366
def __init__(self, path, sha1_provider):
378
def __init__(self, path, sha1_provider, worth_saving_limit=0):
367
379
"""Create a DirState object.
369
381
:param path: The path at which the dirstate file on disk should live.
370
382
:param sha1_provider: an object meeting the SHA1Provider interface.
383
:param worth_saving_limit: when the exact number of hash changed
384
entries is known, only bother saving the dirstate if more than
385
this count of entries have changed. -1 means never save.
372
387
# _header_state and _dirblock_state represent the current state
373
388
# of the dirstate metadata and the per-row data respectiely.
411
426
self._last_block_index = None
412
427
self._last_entry_index = None
428
# The set of known hash changes
429
self._known_hash_changes = set()
430
# How many hash changed entries can we have without saving
431
self._worth_saving_limit = worth_saving_limit
432
# If True, consider the worth saving limit when deciding whether to
433
# save the dirstate or not. If False, ignore it. If None, it can be
434
# set True but isn't True yet.
435
self._use_smart_saving = None
414
437
def __repr__(self):
415
438
return "%s(%r)" % \
416
439
(self.__class__.__name__, self._filename)
441
def _mark_modified(self, hash_changed_entries=None, header_too=False):
442
"""Mark this dirstate as modified.
444
:param hash_changed_entries: if non-None, mark just these entries as
445
having their hash modified.
446
:param header_too: mark the header modified as well, not just the
449
#trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
450
if hash_changed_entries:
451
self._known_hash_changes.update([e[0] for e in hash_changed_entries])
452
# We only enable smart saving is it hasn't already been disabled
453
if self._use_smart_saving is None:
454
self._use_smart_saving = True
456
# We don't know exactly what changed so disable smart saving
457
self._use_smart_saving = False
458
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
460
self._header_state = DirState.IN_MEMORY_MODIFIED
462
def _mark_unmodified(self):
463
"""Mark this dirstate as unmodified."""
464
self._header_state = DirState.IN_MEMORY_UNMODIFIED
465
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
466
self._use_smart_saving = None
467
self._known_hash_changes = set()
418
469
def add(self, path, file_id, kind, stat, fingerprint):
419
470
"""Add a path to be tracked.
546
597
if kind == 'directory':
547
598
# insert a new dirblock
548
599
self._ensure_block(block_index, entry_index, utf8path)
549
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
600
self._mark_modified()
550
601
if self._id_index:
551
self._id_index.setdefault(entry_key[2], set()).add(entry_key)
602
self._add_to_id_index(self._id_index, entry_key)
553
604
def _bisect(self, paths):
554
605
"""Bisect through the disk structure for specific rows.
1019
1070
self._ghosts = []
1020
1071
self._parents = [parents[0]]
1021
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1022
self._header_state = DirState.IN_MEMORY_MODIFIED
1072
self._mark_modified(header_too=True)
1024
1074
def _empty_parent_info(self):
1025
1075
return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
1567
1616
id_index = self._get_id_index()
1568
1617
for file_id in new_ids:
1569
for key in id_index.get(file_id, []):
1618
for key in id_index.get(file_id, ()):
1570
1619
block_i, entry_i, d_present, f_present = \
1571
1620
self._get_block_entry_index(key[0], key[1], tree_index)
1572
1621
if not f_present:
1733
1782
self._sha_cutoff_time()
1734
1783
if (stat_value.st_mtime < self._cutoff_time
1735
1784
and stat_value.st_ctime < self._cutoff_time):
1736
entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],
1738
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1785
entry[1][0] = ('f', sha1, stat_value.st_size, entry[1][0][3],
1787
self._mark_modified()
1740
1789
def _sha_cutoff_time(self):
1741
1790
"""Return cutoff time.
1799
1848
"""Serialise the entire dirstate to a sequence of lines."""
1800
1849
if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and
1801
1850
self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):
1802
# read whats on disk.
1851
# read what's on disk.
1803
1852
self._state_file.seek(0)
1804
1853
return self._state_file.readlines()
1806
1855
lines.append(self._get_parents_line(self.get_parent_ids()))
1807
1856
lines.append(self._get_ghosts_line(self._ghosts))
1808
# append the root line which is special cased
1809
lines.extend(map(self._entry_to_line, self._iter_entries()))
1857
lines.extend(self._get_entry_lines())
1810
1858
return self._get_output_lines(lines)
1812
1860
def _get_ghosts_line(self, ghost_ids):
1817
1865
"""Create a line for the state file for parents information."""
1818
1866
return '\0'.join([str(len(parent_ids))] + parent_ids)
1868
def _get_entry_lines(self):
1869
"""Create lines for entries."""
1870
return map(self._entry_to_line, self._iter_entries())
1820
1872
def _get_fields_to_entry(self):
1821
1873
"""Get a function which converts entry fields into a entry record.
1980
2032
' tree_index, file_id and path')
1983
possible_keys = self._get_id_index().get(fileid_utf8, None)
2035
possible_keys = self._get_id_index().get(fileid_utf8, ())
1984
2036
if not possible_keys:
1985
2037
return None, None
1986
2038
for key in possible_keys:
2145
2197
def _get_id_index(self):
2146
"""Get an id index of self._dirblocks."""
2198
"""Get an id index of self._dirblocks.
2200
This maps from file_id => [(directory, name, file_id)] entries where
2201
that file_id appears in one of the trees.
2147
2203
if self._id_index is None:
2149
2205
for key, tree_details in self._iter_entries():
2150
id_index.setdefault(key[2], set()).add(key)
2206
self._add_to_id_index(id_index, key)
2151
2207
self._id_index = id_index
2152
2208
return self._id_index
2210
def _add_to_id_index(self, id_index, entry_key):
2211
"""Add this entry to the _id_index mapping."""
2212
# This code used to use a set for every entry in the id_index. However,
2213
# it is *rare* to have more than one entry. So a set is a large
2214
# overkill. And even when we do, we won't ever have more than the
2215
# number of parent trees. Which is still a small number (rarely >2). As
2216
# such, we use a simple tuple, and do our own uniqueness checks. While
2217
# the 'in' check is O(N) since N is nicely bounded it shouldn't ever
2218
# cause quadratic failure.
2219
# TODO: This should use StaticTuple
2220
file_id = entry_key[2]
2221
entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
2222
if file_id not in id_index:
2223
id_index[file_id] = static_tuple.StaticTuple(entry_key,)
2225
entry_keys = id_index[file_id]
2226
if entry_key not in entry_keys:
2227
id_index[file_id] = entry_keys + (entry_key,)
2229
def _remove_from_id_index(self, id_index, entry_key):
2230
"""Remove this entry from the _id_index mapping.
2232
It is an programming error to call this when the entry_key is not
2235
file_id = entry_key[2]
2236
entry_keys = list(id_index[file_id])
2237
entry_keys.remove(entry_key)
2238
id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
2154
2240
def _get_output_lines(self, lines):
2155
2241
"""Format lines for final output.
2177
2263
return len(self._parents) - len(self._ghosts)
2180
def on_file(path, sha1_provider=None):
2266
def on_file(path, sha1_provider=None, worth_saving_limit=0):
2181
2267
"""Construct a DirState on the file at path "path".
2183
2269
:param path: The path at which the dirstate file on disk should live.
2184
2270
:param sha1_provider: an object meeting the SHA1Provider interface.
2185
2271
If None, a DefaultSHA1Provider is used.
2272
:param worth_saving_limit: when the exact number of hash changed
2273
entries is known, only bother saving the dirstate if more than
2274
this count of entries have changed. -1 means never save.
2186
2275
:return: An unlocked DirState object, associated with the given path.
2188
2277
if sha1_provider is None:
2189
2278
sha1_provider = DefaultSHA1Provider()
2190
result = DirState(path, sha1_provider)
2279
result = DirState(path, sha1_provider,
2280
worth_saving_limit=worth_saving_limit)
2193
2283
def _read_dirblocks_if_needed(self):
2285
2375
trace.mutter('Not saving DirState because '
2286
2376
'_changes_aborted is set.')
2288
if (self._header_state == DirState.IN_MEMORY_MODIFIED or
2289
self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2378
if self._worth_saving():
2291
2379
grabbed_write_lock = False
2292
2380
if self._lock_state != 'w':
2293
2381
grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2301
2389
# We couldn't grab a write lock, so we switch back to a read one
2392
lines = self.get_lines()
2304
2393
self._state_file.seek(0)
2305
self._state_file.writelines(self.get_lines())
2394
self._state_file.writelines(lines)
2306
2395
self._state_file.truncate()
2307
2396
self._state_file.flush()
2308
self._header_state = DirState.IN_MEMORY_UNMODIFIED
2309
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
2397
self._mark_unmodified()
2311
2399
if grabbed_write_lock:
2312
2400
self._lock_token = self._lock_token.restore_read_lock()
2315
2403
# not changed contents. Since restore_read_lock may
2316
2404
# not be an atomic operation.
2406
def _worth_saving(self):
2407
"""Is it worth saving the dirstate or not?"""
2408
if self._header_state == DirState.IN_MEMORY_MODIFIED:
2410
if (self._dirblock_state == DirState.IN_MEMORY_MODIFIED and
2411
self._worth_saving_limit != -1):
2412
# If we're using smart saving and only a small number of
2413
# entries have changed their hash, don't bother saving. John has
2414
# suggested using a heuristic here based on the size of the
2415
# changed files and/or tree. For now, we go with a configurable
2416
# number of changes, keeping the calculation time
2417
# as low overhead as possible. (This also keeps all existing
2418
# tests passing as the default is 0, i.e. always save.)
2419
if self._use_smart_saving:
2420
return len(self._known_hash_changes) > self._worth_saving_limit
2318
2425
def _set_data(self, parent_ids, dirblocks):
2319
2426
"""Set the full dirstate data in memory.
2329
2436
# our memory copy is now authoritative.
2330
2437
self._dirblocks = dirblocks
2331
self._header_state = DirState.IN_MEMORY_MODIFIED
2332
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2438
self._mark_modified(header_too=True)
2333
2439
self._parents = list(parent_ids)
2334
2440
self._id_index = None
2335
2441
self._packed_stat_index = None
2355
2461
self._make_absent(entry)
2356
2462
self.update_minimal(('', '', new_id), 'd',
2357
2463
path_utf8='', packed_stat=entry[1][0][4])
2358
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2464
self._mark_modified()
2465
if self._id_index is not None:
2466
self._id_index.setdefault(new_id, set()).add(entry[0])
2360
2468
def set_parent_trees(self, trees, ghosts):
2361
2469
"""Set the parent trees for the dirstate.
2415
2523
by_path[entry[0]] = [entry[1][0]] + \
2416
2524
[DirState.NULL_PARENT_DETAILS] * parent_count
2417
id_index[entry[0][2]] = set([entry[0]])
2525
# TODO: Possibly inline this, since we know it isn't present yet
2526
# id_index[entry[0][2]] = (entry[0],)
2527
self._add_to_id_index(id_index, entry[0])
2419
2529
# now the parent trees:
2420
2530
for tree_index, tree in enumerate(parent_trees):
2442
2552
new_entry_key = (dirname, basename, file_id)
2443
2553
# tree index consistency: All other paths for this id in this tree
2444
2554
# index must point to the correct path.
2445
for entry_key in id_index.setdefault(file_id, set()):
2555
for entry_key in id_index.get(file_id, ()):
2446
2556
# TODO:PROFILING: It might be faster to just update
2447
2557
# rather than checking if we need to, and then overwrite
2448
2558
# the one we are located at.
2454
2564
by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
2455
2565
# by path consistency: Insert into an existing path record (trivial), or
2456
2566
# add a new one with relocation pointers for the other tree indexes.
2457
if new_entry_key in id_index[file_id]:
2567
entry_keys = id_index.get(file_id, ())
2568
if new_entry_key in entry_keys:
2458
2569
# there is already an entry where this data belongs, just insert it.
2459
2570
by_path[new_entry_key][tree_index] = \
2460
2571
self._inv_entry_to_details(entry)
2465
2576
new_details = []
2466
2577
for lookup_index in xrange(tree_index):
2467
2578
# boundary case: this is the first occurence of file_id
2468
# so there are no id_indexs, possibly take this out of
2579
# so there are no id_indexes, possibly take this out of
2470
if not len(id_index[file_id]):
2581
if not len(entry_keys):
2471
2582
new_details.append(DirState.NULL_PARENT_DETAILS)
2473
2584
# grab any one entry, use it to find the right path.
2474
2585
# TODO: optimise this to reduce memory use in highly
2475
2586
# fragmented situations by reusing the relocation
2477
a_key = iter(id_index[file_id]).next()
2588
a_key = iter(entry_keys).next()
2478
2589
if by_path[a_key][lookup_index][0] in ('r', 'a'):
2479
2590
# its a pointer or missing statement, use it as is.
2480
2591
new_details.append(by_path[a_key][lookup_index])
2485
2596
new_details.append(self._inv_entry_to_details(entry))
2486
2597
new_details.extend(new_location_suffix)
2487
2598
by_path[new_entry_key] = new_details
2488
id_index[file_id].add(new_entry_key)
2599
self._add_to_id_index(id_index, new_entry_key)
2489
2600
# --- end generation of full tree mappings
2491
2602
# sort and output all the entries
2493
2604
self._entries_to_current_state(new_entries)
2494
2605
self._parents = [rev_id for rev_id, tree in trees]
2495
2606
self._ghosts = list(ghosts)
2496
self._header_state = DirState.IN_MEMORY_MODIFIED
2497
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2607
self._mark_modified(header_too=True)
2498
2608
self._id_index = id_index
2500
2610
def _sort_entries(self, entry_list):
2637
2747
current_old[0][1].decode('utf8'))
2638
2748
self._make_absent(current_old)
2639
2749
current_old = advance(old_iterator)
2640
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2750
self._mark_modified()
2641
2751
self._id_index = None
2642
2752
self._packed_stat_index = None
2644
2754
trace.mutter("set_state_from_inventory complete.")
2756
def set_state_from_scratch(self, working_inv, parent_trees, parent_ghosts):
2757
"""Wipe the currently stored state and set it to something new.
2759
This is a hard-reset for the data we are working with.
2761
# Technically, we really want a write lock, but until we write, we
2762
# don't really need it.
2763
self._requires_lock()
2764
# root dir and root dir contents with no children. We have to have a
2765
# root for set_state_from_inventory to work correctly.
2766
empty_root = (('', '', inventory.ROOT_ID),
2767
[('d', '', 0, False, DirState.NULLSTAT)])
2768
empty_tree_dirblocks = [('', [empty_root]), ('', [])]
2769
self._set_data([], empty_tree_dirblocks)
2770
self.set_state_from_inventory(working_inv)
2771
self.set_parent_trees(parent_trees, parent_ghosts)
2646
2773
def _make_absent(self, current_old):
2647
2774
"""Mark current_old - an entry - as absent for tree 0.
2673
2800
block[1].pop(entry_index)
2674
2801
# if we have an id_index in use, remove this key from it for this id.
2675
2802
if self._id_index is not None:
2676
self._id_index[current_old[0][2]].remove(current_old[0])
2803
self._remove_from_id_index(self._id_index, current_old[0])
2677
2804
# update all remaining keys for this id to record it as absent. The
2678
2805
# existing details may either be the record we are marking as deleted
2679
2806
# (if there were other trees with the id present at this path), or may
2692
2819
if update_tree_details[0][0] == 'a': # absent
2693
2820
raise AssertionError('bad row %r' % (update_tree_details,))
2694
2821
update_tree_details[0] = DirState.NULL_PARENT_DETAILS
2695
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2822
self._mark_modified()
2696
2823
return last_reference
2698
2825
def update_minimal(self, key, minikind, executable=False, fingerprint='',
2750
2877
# new entry, synthesis cross reference here,
2751
existing_keys = id_index.setdefault(key[2], set())
2878
existing_keys = id_index.get(key[2], ())
2752
2879
if not existing_keys:
2753
2880
# not currently in the state, simplest case
2754
2881
new_entry = key, [new_details] + self._empty_parent_info()
2786
2913
other_entry = other_block[other_entry_index]
2787
2914
other_entry[1][0] = ('r', path_utf8, 0, False, '')
2788
self._maybe_remove_row(other_block, other_entry_index,
2915
if self._maybe_remove_row(other_block, other_entry_index,
2917
# If the row holding this was removed, we need to
2918
# recompute where this entry goes
2919
entry_index, _ = self._find_entry_index(key, block)
2792
2922
# adds a tuple to the new details for each column
2794
2924
# - or by creating a new pointer to the right row inside that column
2795
2925
num_present_parents = self._num_present_parents()
2796
2926
if num_present_parents:
2927
# TODO: This re-evaluates the existing_keys set, do we need
2928
# to do that ourselves?
2797
2929
other_key = list(existing_keys)[0]
2798
2930
for lookup_index in xrange(1, num_present_parents + 1):
2799
2931
# grab any one entry, use it to find the right path.
2818
2950
pointer_path = osutils.pathjoin(*other_key[0:2])
2819
2951
new_entry[1].append(('r', pointer_path, 0, False, ''))
2820
2952
block.insert(entry_index, new_entry)
2821
existing_keys.add(key)
2953
self._add_to_id_index(id_index, key)
2823
2955
# Does the new state matter?
2824
2956
block[entry_index][1][0] = new_details
2833
2965
# converted to relocated.
2834
2966
if path_utf8 is None:
2835
2967
raise AssertionError('no path')
2836
for entry_key in id_index.setdefault(key[2], set()):
2968
existing_keys = id_index.get(key[2], ())
2969
if key not in existing_keys:
2970
raise AssertionError('We found the entry in the blocks, but'
2971
' the key is not in the id_index.'
2972
' key: %s, existing_keys: %s' % (key, existing_keys))
2973
for entry_key in existing_keys:
2837
2974
# TODO:PROFILING: It might be faster to just update
2838
2975
# rather than checking if we need to, and then overwrite
2839
2976
# the one we are located at.
2857
2994
if not present:
2858
2995
self._dirblocks.insert(block_index, (subdir_key[0], []))
2860
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2997
self._mark_modified()
2862
2999
def _maybe_remove_row(self, block, index, id_index):
2863
3000
"""Remove index if it is absent or relocated across the row.
2865
3002
id_index is updated accordingly.
3003
:return: True if we removed the row, False otherwise
2867
3005
present_in_row = False
2868
3006
entry = block[index]
3166
3310
entry[1][0] = ('l', '', stat_value.st_size,
3167
3311
False, DirState.NULLSTAT)
3168
state._dirblock_state = DirState.IN_MEMORY_MODIFIED
3312
state._mark_modified([entry])
3169
3313
return link_or_sha1