1
# Copyright (C) 2006 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""DirState objects record the state of a directory and its bzr metadata.
19
Pseduo EBNF grammar for the disk format:
20
MINIKIND = "f" | "d" | "l";
23
WHOLE NUMBER = {digit}, digit;
25
dirstate format = header line, full checksum, row count, parent details,
27
header line = "#bazaar dirstate flat format 1", NL;
28
full checksum = "adler32: ", ["-"], WHOLE NUMBER, NL;
29
row count = "num_entries: ", digit, NL;
30
parent_details = WHOLE NUMBER, NULL, NL; XXX: complete this line
31
ghost_details = WHOLE NUMBER, NULL, {GHOST_ID NULL}*, NL;
32
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
33
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
35
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
36
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
39
PARENT ROW's are emitted for every parent that is not in the ghosts details
40
line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then
41
each row will have a PARENT ROW for foo and baz, but not for bar.
46
1) Fast end to end use for bzr's top 5 uses cases.
47
2) fall back current object model as needed.
48
3) scale usably to the largest trees known today - say 50K entries.
52
Eventually reuse dirstate objects across locks IFF the dirstate file has not
53
been modified, but will require that we flush/ignore cached stat-hit data
54
because we wont want to restat all files on disk just because a lock was
55
acquired, yet we cannot trust the data after the previous lock was released.
57
Memory representation:
58
vecter of all directories, and vector of the childen ?
60
root_row = (direntry for root, [parent_direntries_for_root]),
62
('', ['data for achild', 'data for bchild', 'data for cchild'])
63
('dir', ['achild', 'cchild', 'echild'])
65
- single bisect to find N subtrees from a path spec
66
- in-order for serialisation - this is 'dirblock' grouping.
67
- insertion of a file '/a' affects only the '/' child-vector, that is, to
68
insert 10K elements from scratch does not generates O(N^2) memoves of a
69
single vector, rather each individual, which tends to be limited to a
70
manageable number. Will scale badly on trees with 10K entries in a
71
single directory. compare with Inventory.InventoryDirectory which has
72
a dictionary for the children. No bisect capability, can only probe for
73
exact matches, or grab all elements and sorta.
74
- Whats the risk of error here? Once we have the base format being processed
75
we should have a net win regardless of optimality. So we are going to
76
go with what seems reasonably.
79
maybe we should do a test profile of these core structure - 10K simulated searches/lookups/etc?
82
The lifetime of Dirstate objects is current per lock, but see above for
83
possible extensions. The lifetime of a row from a dirstate is expected to be
84
very short in the optimistic case: which we are optimising for. For instance,
85
subtree status will determine from analysis of the disk data what rows need to
86
be examined at all, and will be able to determine from a single row whether
87
that file has altered or not, so we are aiming to process tens of thousands of
88
entries each second within the dirstate context, before exposing anything to
89
the larger codebase. This suggests we want the time for a single file
90
comparison to be < 0.1 milliseconds. That would give us 10000 paths per second
91
processed, and to scale to 100 thousand we'll another order of magnitude to do
92
that. Now, as the lifetime for all unchanged entries is the time to parse, stat
93
the file on disk, and then immediately discard, the overhead of object creation
94
becomes a significant cost.
96
Figures: Creating a tuple from from 3 elements was profiled at 0.0625
97
microseconds, whereas creating a object which is subclassed from tuple was
98
0.500 microseconds, and creating an object with 3 elements and slots was 3
99
microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get
100
down to 10 microseconds for the total processing - having 33% of that be object
101
creation is a huge overhead. There is a potential cost in using tuples within
102
each row which is that the conditional code to do comparisons may be slower
103
than method invocation, but method invocation is known to be slow due to stack
104
frame creation, so avoiding methods in these tight inner loops in unfortunately
105
desirable. We can consider a pyrex version of this with objects in future if
119
from bzrlib import errors
120
import bzrlib.inventory
121
from bzrlib.osutils import pathjoin, sha_file, sha_string, walkdirs
124
class DirState(object):
125
"""Record directory and metadata state for fast access.
127
A dirstate is a specialised data structure for managing local working
128
tree state information. Its not yet well defined whether it is platform
129
specific, and if it is how we detect/parameterise that.
132
_kind_to_minikind = {'file':'f', 'directory':'d', 'symlink':'l'}
133
_minikind_to_kind = {'f':'file', 'd':'directory', 'l':'symlink'}
134
_to_yesno = {True:'y', False: 'n'} # TODO profile the performance gain
135
# of using int conversion rather than a dict here. AND BLAME ANDREW IF
139
IN_MEMORY_UNMODIFIED = 1
140
IN_MEMORY_MODIFIED = 2
145
"""Create a DirState object.
149
:attr _root_row: The root row of the directory/file information,
150
- contains the path to / - '', ''
151
- kind of 'directory',
152
- the file id of the root in utf8
155
- and no sha information.
157
# _header_state and _dirblock_state represent the current state
158
# of the dirstate metadata and the per-row data respectiely.
159
# NOT_IN_MEMORY indicates that no data is in memory
160
# IN_MEMORY_UNMODIFIED indicates that what we have in memory
161
# is the same as is on disk
162
# IN_MEMORY_MODIFIED indicates that we have a modified version
163
# of what is on disk.
164
# In future we will add more granularity, for instance _dirblock_state
165
# will probably support partially-in-memory as a separate variable,
166
# allowing for partially-in-memory unmodified and partially-in-memory
168
self._header_state = DirState.NOT_IN_MEMORY
169
self._dirblock_state = DirState.NOT_IN_MEMORY
173
self._root_row = None
174
self._state_file=None
176
def add(self, path, file_id, kind, stat, link_or_sha1):
177
"""Add a path to be tracked.
179
:param path: The path within the dirstate - '' is the root, 'foo' is the
180
path foo within the root, 'foo/bar' is the path bar within foo
182
:param file_id: The file id of the path being added.
183
:param kind: The kind of the path.
184
:param stat: The output of os.lstate for the path.
185
:param link_or_sha1: The sha value of the file, or the target of a
186
symlink. '' for directories.
189
# find the block its in.
190
# find the location in the block.
191
# check its not there
193
self._read_dirblocks_if_needed()
194
utf8path = path.encode('utf8')
195
dirname, basename = os.path.split(utf8path)
196
block_index = bisect.bisect_left(self._dirblocks, (dirname, []))
197
if (block_index == len(self._dirblocks) or
198
self._dirblocks[block_index][0] != dirname):
199
# some parent path has not been added - its an error to add this
201
raise errors.NoSuchFile(path)
202
block = self._dirblocks[block_index][1]
204
row_data = ((dirname, basename, kind, file_id.encode('utf8'),
205
stat.st_size, pack_stat(stat), link_or_sha1), [])
206
elif kind == 'directory':
207
row_data = ((dirname, basename, kind, file_id.encode('utf8'),
208
0, pack_stat(stat), ''), [])
209
elif kind == 'symlink':
210
row_data = ((dirname, basename, kind, file_id.encode('utf8'),
211
stat.st_size, pack_stat(stat), link_or_sha1), [])
213
raise errors.BzrError('unknown kind %r' % kind)
214
row_index = bisect.bisect_left(block, row_data)
215
if len(block) > row_index:
216
assert block[row_index][1] != basename, \
217
"basename %r already added" % basename
218
block.insert(row_index, row_data)
220
if kind == 'directory':
221
# insert a new dirblock
222
bisect.insort_left(self._dirblocks, (utf8path, []))
223
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
225
def add_parent_tree(self, tree_id, tree):
226
"""Add tree as a parent to this dirstate."""
227
self._read_dirblocks_if_needed()
228
self._parents.append(tree_id)
229
self._header_state = DirState.IN_MEMORY_MODIFIED
231
self._ghosts.append(tree_id)
234
def from_tree(tree, dir_state_filename):
235
"""Create a dirstate from a bzr Tree.
237
:param tree: The tree which should provide parent information and
240
# XXX: aka the big ugly.
242
result._state_file = open(dir_state_filename, 'wb+')
244
_encode = base64.encodestring
246
parent_ids = tree.get_parent_ids()
247
num_parents = len(parent_ids)
249
raise ValueError('Cannot handle more than 3 parents')
252
for parent_id in parent_ids:
253
parent_trees.append(tree.branch.repository.revision_tree(parent_id))
255
# FIXME: is this utf8 safe?
257
to_minikind = DirState._kind_to_minikind
258
to_yesno = DirState._to_yesno
260
st = os.lstat(tree.basedir)
263
, 'directory', tree.inventory.root.file_id.encode('utf8')
264
, 0 # no point having a size for dirs.
269
for parent_tree in parent_trees:
270
root_parents.append((
271
parent_tree.inventory.root.revision.encode('utf8'),
279
root_row = (root_info, root_parents)
281
for dirinfo, block in tree.walkdirs():
282
# dirinfo is path, id
284
# add the row for this block
286
dirblocks.append((dirinfo[0], block_row))
287
for relpath, name, kind, st, fileid, versionedkind in block:
289
# unversioned file, skip
291
# TODO? factor out this loop body as a helper function ?
293
dirname, basename = os.path.split(relpath.encode('utf8'))
295
s = tree.get_file_sha1(fileid, relpath)
296
elif kind == 'directory':
297
if name in ('.bzr', '.hg', 'CVS', '.svn', '_svn'):
298
raise Exception('skipping dirs not supported yet')
299
# Skip this, and all children
300
to_remove.append((relpath, name, kind, st, abspath))
304
elif kind == 'symlink':
305
# sha value of the link target ?!
306
s = os.readlink(abspath)
308
for count in xrange(num_parents):
310
result._parent_info(parent_trees[count], fileid))
311
row_data = (dirname.encode('utf8'), basename.encode('utf8'),
312
kind, fileid.encode('utf8'), st.st_size, pack_stat(st),
314
block_row.append((row_data, parent_info))
316
# It isn't safe to remove entries while we are iterating
317
# over the same list, so remove them now
318
for entry in to_remove:
321
#lines.append(result._get_parents_line(parent_ids))
322
#lines.append(result._get_ghosts_line([]))
323
result._set_data(parent_ids, root_row, dirblocks)
327
def get_ghosts(self):
328
"""Return a list of the parent tree revision ids that are ghosts."""
329
self._read_header_if_needed()
333
"""Serialise the entire dirstate to a sequence of lines."""
334
if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and
335
self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):
336
# read whats on disk.
337
self._state_file.seek(0)
338
return self._state_file.readlines()
340
lines.append(self._get_parents_line(self.get_parent_ids()))
341
lines.append(self._get_ghosts_line(self._ghosts))
342
# append the root line which is special cased
343
lines.extend(map(self._row_to_line, self._iter_rows()))
344
return self._get_output_lines(lines)
346
def _get_ghosts_line(self, ghost_ids):
347
"""Create a line for the state file for ghost information."""
348
return '\0'.join([str(len(ghost_ids))] + ghost_ids)
350
def _get_parents_line(self, parent_ids):
351
"""Create a line for the state file for parents information."""
352
return '\0'.join([str(len(parent_ids))] + parent_ids)
354
def get_parent_ids(self):
355
"""Return a list of the parent tree ids for the directory state."""
356
self._read_header_if_needed()
360
def initialize(path):
361
"""Create a new dirstate on path.
363
The new dirstate will be an empty tree - that is it has no parents,
364
and only a root node - which has id ROOT_ID.
366
:param path: The name of the file for the dirstate.
367
:return: A DirState object.
369
# This constructs a new DirState object on a path, sets the _state_file
370
# to a new empty file for that path. It then calls _set_data() with our
371
# stock empty dirstate information - a root with ROOT_ID, no children,
372
# and no parents. Finally it calls save() to ensure that this data will
375
result._state_file = open(path, 'wb+')
376
# a new root directory, with a pack_stat (the x's) that is just noise and will
377
# never match the output of base64 encode.
378
root_row_data = ('', '', 'directory', bzrlib.inventory.ROOT_ID, 0,
379
DirState.NULLSTAT, '')
381
root_row = (root_row_data, root_parents)
382
empty_tree_dirblocks = [('', [])] # root dir contents - no entries.
383
result._set_data([], root_row, empty_tree_dirblocks)
387
result._state_file.close()
391
def _iter_rows(self):
392
"""Iterate over all the row data in the dirstate.
394
Each yelt item is a tuple of (row_data, parent_data_list).
396
self._read_dirblocks_if_needed()
398
for directory in self._dirblocks:
399
for row in directory[1]:
402
def _get_output_lines(self, lines):
403
"""format lines for final output.
405
:param lines: A sequece of lines containing the parents list and the
408
output_lines = ['#bazaar dirstate flat format 1\n']
409
lines.append('') # a final newline
410
inventory_text = '\0\n\0'.join(lines)
411
output_lines.append('adler32: %s\n' % (zlib.adler32(inventory_text),))
412
# -3, 1 for num parents, 1 for ghosts, 1 for final newline
413
num_entries = len(lines)-3
414
output_lines.append('num_entries: %s\n' % (num_entries,))
415
output_lines.append(inventory_text)
420
"""Construct a DirState on the file at path path."""
422
result._state_file = open(path, 'rb+')
425
def _parent_info(self, parent_tree, fileid):
426
"""Generate the parent information for file_id in parent_tree."""
427
# FIXME: This is probably expensive - we encode the path that in the
428
# common case will be the same across all parents, twice.
429
# also, id2path is slow in inventory, and we should make that fast.
431
parent_entry = parent_tree.inventory[fileid]
432
except errors.NoSuchId:
433
# this parent does not have fileid - return a bogus entry, which
434
# will be filtered out on iteration etc.
435
# an empty revision id is bogus and safe to put on disk
436
# we make it be a 'file', just because. We give it the
437
# deleted file path dirname and basename, 0 size, not executable
439
return ('', 'file', '/', 'RECYCLED.BIN', 0, False, '')
440
parent_path = parent_tree.inventory.id2path(fileid)
441
dirname, basename = os.path.split(parent_path.encode('utf8'))
442
return (parent_entry.revision.encode('utf8'),
444
# FIXME: set these from the parent
445
dirname.encode('utf8'), basename.encode('utf8'),
446
parent_entry.text_size or 0,
447
parent_entry.executable,
448
parent_entry.text_sha1 or '',
451
def _read_dirblocks_if_needed(self):
452
"""Read in all the dirblocks from the file if they are not in memory."""
453
self._read_header_if_needed()
454
if self._dirblock_state == DirState.NOT_IN_MEMORY:
455
# the _state_file pointer will be positioned at the start of the
457
text = self._state_file.read()
458
# TODO: check the adler checksums. adler_measured = zlib.adler32(text)
460
fields = text.split('\0')
461
# Remove the last blank entry
462
trailing = fields.pop()
463
assert trailing == ''
464
# consider turning fields into a tuple.
466
# skip the first field which is the trailing null from the header.
468
field_count = len(fields)
469
# Each line now has an extra '\n' field which is not used
470
# so we just skip over it
471
# number of fields per dir_entry + number of fields per parent_entry + newline
472
num_parents = len(self._parents)
473
entry_size = 7 + (7 * (num_parents - len(self._ghosts))) + 1
474
expected_field_count = entry_size * self._num_entries
475
# is the file too short ?
476
assert field_count - cur == expected_field_count, \
477
'field count incorrect %s != %s, entry_size=%s, '\
478
'num_entries=%s fields=%r' % (
479
field_count - cur, expected_field_count, entry_size,
480
self._num_entries, fields)
482
# Fast path the case where there are 1 or 2 parents
484
entries = [(fields[pos:pos+7], []) for pos in xrange(cur, field_count, entry_size)]
485
elif num_parents == 1:
486
entries = [(fields[pos:pos+7], [fields[pos+7:pos+14],])
487
for pos in xrange(cur, field_count, entry_size)]
488
elif num_parents == 2:
489
entries = [(fields[pos:pos+7], [
490
fields[pos+7:pos+14],
491
fields[pos+14:pos+21],])
492
for pos in xrange(cur, field_count, entry_size)]
496
tuple([fields[chunk:chunk+7] for
497
chunk in xrange(pos + 7, pos+entry_size-1, 7)]))
498
for pos in xrange(cur, field_count, entry_size)
501
assert len(entries) == self._num_entries, '%s != %s entries' % (len(entries),
504
def _line_to_row(line):
505
"""Convert a freshly read line's size and minikind for use."""
506
# convert the minikind to kind
507
line[0][2] = self._minikind_to_kind[line[0][2]]
508
# convert the size to an int
509
line[0][4] = int(line[0][4])
510
for parent in line[1]:
511
parent[1] = self._minikind_to_kind[parent[1]]
512
parent[4] = int(parent[4])
513
parent[5] = parent[5] == 'y'
514
return tuple(line[0]), map(tuple, line[1])
515
new_rows = map(_line_to_row, entries)
516
self._rows_to_current_state(new_rows)
517
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
519
def _read_header(self):
520
"""This reads in the metadata header, and the parent ids.
522
After reading in, the file should be positioned at the null
523
just before the start of the first record in the file.
525
:return: (expected adler checksum, number of entries, parent list)
528
parent_line = self._state_file.readline()
529
info = parent_line.split('\0')
530
num_parents = int(info[0])
531
assert num_parents == len(info)-2, 'incorrect parent info line'
532
self._parents = [p.decode('utf8') for p in info[1:-1]]
534
ghost_line = self._state_file.readline()
535
info = ghost_line.split('\0')
536
num_ghosts = int(info[1])
537
assert num_ghosts == len(info)-3, 'incorrect ghost info line'
538
self._ghosts = [p.decode('utf8') for p in info[2:-1]]
539
self._header_state = DirState.IN_MEMORY_UNMODIFIED
541
def _read_header_if_needed(self):
542
"""Read the header of the dirstate file if needed."""
543
if self._header_state == DirState.NOT_IN_MEMORY:
546
def _read_prelude(self):
547
"""Read in the prelude header of the dirstate file
549
This only reads in the stuff that is not connected to the adler
550
checksum. The position will be correct to read in the rest of
551
the file and check the checksum after this point.
552
The next entry in the file should be the number of parents,
553
and their ids. Followed by a newline.
555
header = self._state_file.readline()
556
assert header == '#bazaar dirstate flat format 1\n', \
557
'invalid header line: %r' % (header,)
558
adler_line = self._state_file.readline()
559
assert adler_line.startswith('adler32: '), 'missing adler32 checksum'
560
self.adler_expected = int(adler_line[len('adler32: '):-1])
561
num_entries_line = self._state_file.readline()
562
assert num_entries_line.startswith('num_entries: '), 'missing num_entries line'
563
self._num_entries = int(num_entries_line[len('num_entries: '):-1])
565
def _row_to_line(self, row):
566
"""Serialize row to a NULL delimited line ready for _get_output_lines.
568
:param row: A row_tuple as defined in the module docstring.
570
entire_row = list(row[0])
571
for parent_number, parent_data in enumerate(row[1]):
572
# (revision, kind, dirname, basename, size, executable_bool, sha1)
573
entire_row.extend(parent_data)
574
# minikind conversion of the parent
575
parent_offset = 7 + parent_number * 7
576
entire_row[parent_offset + 1] = DirState._kind_to_minikind[parent_data[1]]
577
entire_row[parent_offset + 4] = str(parent_data[4])
578
entire_row[parent_offset + 5] = DirState._to_yesno[parent_data[5]]
579
# conversion from memory to disk-ready format:
580
# minikind conversion of the current row type.
581
entire_row[2] = DirState._kind_to_minikind[entire_row[2]]
582
entire_row[4] = str(entire_row[4])
583
# minikind of parents
584
return '\0'.join(entire_row)
586
def _rows_to_current_state(self, new_rows):
587
"""Load new_rows into self._root_row and self.dirblocks.
589
Process new_rows into the current state object, making them the active
592
:param new_rows: A sorted list of rows. This function does not sort
593
to prevent unneeded overhead when callers have a sorted list
597
assert new_rows[0][0][0:2] == ('', ''), \
598
"Incorrect root row %r" % new_rows[0][0]
599
self._root_row = new_rows[0]
600
self._dirblocks = [('', [])]
601
for row in new_rows[1:]:
602
if row[0] != self._dirblocks[-1][0]:
604
self._dirblocks.append((row[0][0], []))
605
# append the row to the current block
606
self._dirblocks[-1][1].append(row)
609
"""Save any pending changes created during this session."""
610
if (self._header_state == DirState.IN_MEMORY_MODIFIED or
611
self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
612
self._state_file.seek(0)
613
self._state_file.writelines(self.get_lines())
614
self._state_file.flush()
615
self._state_file.truncate()
616
self._header_state = DirState.IN_MEMORY_UNMODIFIED
617
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
619
def _set_data(self, parent_ids, root_row, dirblocks):
620
"""Set the full dirstate data in memory.
622
This is an internal function used to completely replace the objects
623
in memory state. It puts the dirstate into state 'full-dirty'.
625
:param parent_ids: A list of parent tree revision ids.
626
:param root_row: The root row - a tuple of the root direntry and the
627
list of matching direntries from the parent_ids trees.
628
:param dirblocks: A list containing one tuple for each directory in the
629
tree. Each tuple contains the directory path and a list of
630
row data in the same format as root_row.
632
# our memory copy is now authoritative.
633
self._dirblocks = dirblocks
634
self._root_row = root_row
635
self._header_state = DirState.IN_MEMORY_MODIFIED
636
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
637
self._parents = list(parent_ids)
639
def set_path_id(self, path, new_id):
640
"""Change the id of path to new_id.
642
:param path: The path inside the tree to set - '' is the root, 'foo'
643
is the path foo in the root.
644
:param new_id: The new id to assign to the path. If unicode, it will
645
be encoded to utf8. In future this will be deprecated: avoid using
646
unicode ids if possible.
648
self._read_header_if_needed()
649
if len(path) or len(self._parents):
651
raise NotImplementedError(self.set_path_id)
652
self._read_dirblocks_if_needed()
653
if new_id.__class__ == unicode:
654
new_id = new_id.encode('utf8')
655
root_info, root_parents = self._root_row
656
self._root_row = (root_info[0:3] + (new_id, ) + root_info[4:7]), root_parents
657
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
659
def set_parent_trees(self, trees, ghosts):
660
"""Set the parent trees for the dirstate.
662
:param trees: A list of revision_id, tree tuples. tree must be provided
663
even if the revision_id refers to a ghost: supply an empty tree in
665
:param ghosts: A list of the revision_ids that are ghosts at the time
668
# TODO: generate a list of parent indexes to preserve to save
669
# processing specific parent trees. In the common case one tree will
670
# be preserved - the left most parent.
671
# TODO: if the parent tree is a dirstate, we might want to walk them
672
# all by path in parallel for 'optimal' common-case performance.
673
# generate new root row.
674
self._read_dirblocks_if_needed()
675
old_root = self._root_row
676
root_info = self._root_row[0]
677
new_parent_count = len(trees)
678
# sketch: loop over all rows in the dirstate, cherry picking
679
# entries from the parent trees, if they are not ghosts.
680
# after we finish walking the dirstate, all entries not in the dirstate
681
# are deletes, so we want to append them to the end as per the design
682
# discussions. So do a set difference on ids with the parents to
683
# get deletes, and add them to the end.
685
# skip ghost trees, as they dont get represented.
686
parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]
687
parent_tree_count = len(parent_trees)
688
# loop over existing entries in the dirstate.
690
for entry, old_parents in self._iter_rows():
692
checked_ids.add(file_id)
693
new_parents = [None] * parent_tree_count
694
for position, parent_tree in enumerate(parent_trees):
695
# revision_utf8, KIND, dirname, basename, size, executable, sha
696
new_parents[position] = self._parent_info(parent_tree, file_id)
697
assert None not in new_parents
698
new_rows.append((entry, new_parents))
699
# get additional ids that are present in parents and not in this tree.
701
for tree in parent_trees:
702
deleted_ids.update(set(tree.inventory._byid).difference(checked_ids))
703
# add the deleted ids to the dirstate. deleted files are represented as
704
# a file with dirname '', basename ''
705
for file_id in deleted_ids:
706
# add these ids to the deleted block
707
checked_ids.add(file_id)
708
# deleted items have a synthetic entry.
709
entry = ('/', 'RECYCLED.BIN', 'file', file_id.encode('utf8'), 0,
710
DirState.NULLSTAT, '')
711
new_parents = [None] * parent_tree_count
712
for position, parent_tree in enumerate(parent_trees):
713
# revision_utf8, KIND, dirname, basename, size, executable, sha
714
new_parents[position] = self._parent_info(parent_tree, file_id)
715
assert None not in new_parents
716
new_rows.append((entry, new_parents))
719
new_rows = sorted(new_rows)
720
self._rows_to_current_state(new_rows)
721
self._parents = [rev_id for rev_id, tree in trees]
722
self._ghosts = list(ghosts)
723
self._header_state = DirState.IN_MEMORY_MODIFIED
724
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
726
def set_state_from_inventory(self, new_inv):
727
"""Set new_inv as the current state.
729
:param new_inv: The inventory object to set current state from.
731
self._read_dirblocks_if_needed()
733
# generate a byid index of the dirstate
735
for row, parents in self._iter_rows():
736
parent_rows[row[3]] = parents
737
# walk the new inventory in directory order, copying parent data
740
for path, entry in new_inv.iter_entries_by_dir():
741
dirname, basename = os.path.split(path.encode('utf8'))
743
fileid_utf8 = entry.file_id.encode('utf8')
745
size = entry.text_size or 0
746
sha1 = entry.text_sha1 or ''
747
elif kind == 'symlink':
749
sha1 = (entry.symlink_target or '').encode('utf8')
754
parents = parent_rows[fileid_utf8]
755
del parent_rows[fileid_utf8]
758
new_row = (dirname, basename, kind, fileid_utf8, size, DirState.NULLSTAT, sha1), parents
759
new_rows.append(new_row)
760
# append deleted data to the end of the tree as usual.
761
for fileid_utf8, parents in parent_rows.items():
763
# this row was only present in the old state, had no parents
765
# deleted items have a synthetic entry.
766
new_row = ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0,
767
DirState.NULLSTAT, ''), parents
768
new_rows.append(new_row)
770
# sort all the rows (the ones in parents not in current may be unsorted)
771
new_rows = sorted(new_rows)
772
self._rows_to_current_state(new_rows)
773
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
776
def pack_stat(st, _encode=base64.encodestring, _pack=struct.pack):
777
"""Convert stat values into a packed representation."""
778
# jam 20060614 it isn't really worth removing more entries if we
779
# are going to leave it in packed form.
780
# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
781
# With all entries filesize is 5.9M and read time is mabye 280ms
782
# well within the noise margin
784
# base64.encode always adds a final newline, so strip it off
785
return _encode(_pack('>llllll'
786
, st.st_size, st.st_mtime, st.st_ctime
787
, st.st_dev, st.st_ino, st.st_mode))[:-1]