36
48
An InventoryEntry has the following fields, which are also
37
49
present in the XML inventory-entry element:
40
* *name*: (only the basename within the directory, must not
42
* *kind*: "directory" or "file"
43
* *directory_id*: (if absent/null means the branch root directory)
44
* *text_sha1*: only for files
45
* *text_size*: in bytes, only for files
46
* *text_id*: identifier for the text version, only for files
48
InventoryEntries can also exist inside a WorkingTree
49
inventory, in which case they are not yet bound to a
50
particular revision of the file. In that case the text_sha1,
51
text_size and text_id are absent.
54
(within the parent directory)
57
file_id of the parent directory, or ROOT_ID
60
the revision_id in which this variation of this file was
64
Indicates that this file should be executable on systems
68
sha-1 of the text of the file
71
size in bytes of the text of the file
73
(reading a version 4 tree created a text_id field.)
54
75
>>> i = Inventory()
57
>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))
58
>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))
59
>>> for j in i.iter_entries():
78
>>> i.add(InventoryDirectory('123', 'src', ROOT_ID))
79
InventoryDirectory('123', 'src', parent_id='TREE_ROOT')
80
>>> i.add(InventoryFile('2323', 'hello.c', parent_id='123'))
81
InventoryFile('2323', 'hello.c', parent_id='123')
82
>>> shouldbe = {0: 'src', 1: pathjoin('src','hello.c')}
83
>>> for ix, j in enumerate(i.iter_entries()):
84
... print (j[0] == shouldbe[ix], j[1])
62
('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))
63
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
64
>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))
86
(True, InventoryDirectory('123', 'src', parent_id='TREE_ROOT'))
87
(True, InventoryFile('2323', 'hello.c', parent_id='123'))
88
>>> i.add(InventoryFile('2323', 'bye.c', '123'))
65
89
Traceback (most recent call last):
67
91
BzrError: inventory already contains entry with id {2323}
68
>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))
69
>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))
92
>>> i.add(InventoryFile('2324', 'bye.c', '123'))
93
InventoryFile('2324', 'bye.c', parent_id='123')
94
>>> i.add(InventoryDirectory('2325', 'wibble', '123'))
95
InventoryDirectory('2325', 'wibble', parent_id='123')
70
96
>>> i.path2id('src/wibble')
74
>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))
100
>>> i.add(InventoryFile('2326', 'wibble.c', '2325'))
101
InventoryFile('2326', 'wibble.c', parent_id='2325')
76
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
77
>>> for j in i.iter_entries():
79
... assert i.path2id(j[0])
103
InventoryFile('2326', 'wibble.c', parent_id='2325')
104
>>> for path, entry in i.iter_entries():
106
... assert i.path2id(path)
85
112
src/wibble/wibble.c
86
113
>>> i.id2path('2326')
87
114
'src/wibble/wibble.c'
89
TODO: Maybe also keep the full path of the entry, and the children?
90
But those depend on its position within a particular inventory, and
91
it would be nice not to need to hold the backpointer here.
94
# TODO: split InventoryEntry into subclasses for files,
95
# directories, etc etc.
97
117
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
98
'text_id', 'parent_id', 'children', ]
100
def __init__(self, file_id, name, kind, parent_id, text_id=None):
118
'text_id', 'parent_id', 'children', 'executable',
121
def _add_text_to_weave(self, new_lines, parents, weave_store, transaction):
122
versionedfile = weave_store.get_weave_or_empty(self.file_id,
124
versionedfile.add_lines(self.revision, parents, new_lines)
125
versionedfile.clear_cache()
127
def detect_changes(self, old_entry):
128
"""Return a (text_modified, meta_modified) from this to old_entry.
130
_read_tree_state must have been called on self and old_entry prior to
131
calling detect_changes.
135
def diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
136
output_to, reverse=False):
137
"""Perform a diff from this to to_entry.
139
text_diff will be used for textual difference calculation.
140
This is a template method, override _diff in child classes.
142
self._read_tree_state(tree.id2path(self.file_id), tree)
144
# cannot diff from one kind to another - you must do a removal
145
# and an addif they do not match.
146
assert self.kind == to_entry.kind
147
to_entry._read_tree_state(to_tree.id2path(to_entry.file_id),
149
self._diff(text_diff, from_label, tree, to_label, to_entry, to_tree,
152
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
153
output_to, reverse=False):
154
"""Perform a diff between two entries of the same kind."""
156
def find_previous_heads(self, previous_inventories,
157
versioned_file_store,
160
"""Return the revisions and entries that directly preceed this.
162
Returned as a map from revision to inventory entry.
164
This is a map containing the file revisions in all parents
165
for which the file exists, and its revision is not a parent of
166
any other. If the file is new, the set will be empty.
168
:param versioned_file_store: A store where ancestry data on this
169
file id can be queried.
170
:param transaction: The transaction that queries to the versioned
171
file store should be completed under.
172
:param entry_vf: The entry versioned file, if its already available.
174
def get_ancestors(weave, entry):
175
return set(weave.get_ancestry(entry.revision))
176
# revision:ie mapping for each ie found in previous_inventories.
178
# revision:ie mapping with one revision for each head.
180
# revision: ancestor list for each head
182
# identify candidate head revision ids.
183
for inv in previous_inventories:
184
if self.file_id in inv:
185
ie = inv[self.file_id]
186
assert ie.file_id == self.file_id
187
if ie.revision in candidates:
188
# same revision value in two different inventories:
189
# correct possible inconsistencies:
190
# * there was a bug in revision updates with 'x' bit
193
if candidates[ie.revision].executable != ie.executable:
194
candidates[ie.revision].executable = False
195
ie.executable = False
196
except AttributeError:
198
# must now be the same.
199
assert candidates[ie.revision] == ie
201
# add this revision as a candidate.
202
candidates[ie.revision] = ie
204
# common case optimisation
205
if len(candidates) == 1:
206
# if there is only one candidate revision found
207
# then we can opening the versioned file to access ancestry:
208
# there cannot be any ancestors to eliminate when there is
209
# only one revision available.
210
heads[ie.revision] = ie
213
# eliminate ancestors amongst the available candidates:
214
# heads are those that are not an ancestor of any other candidate
215
# - this provides convergence at a per-file level.
216
for ie in candidates.values():
217
# may be an ancestor of a known head:
218
already_present = 0 != len(
219
[head for head in heads
220
if ie.revision in head_ancestors[head]])
222
# an ancestor of an analyzed candidate.
224
# not an ancestor of a known head:
225
# load the versioned file for this file id if needed
227
entry_vf = versioned_file_store.get_weave_or_empty(
228
self.file_id, transaction)
229
ancestors = get_ancestors(entry_vf, ie)
230
# may knock something else out:
231
check_heads = list(heads.keys())
232
for head in check_heads:
233
if head in ancestors:
234
# this previously discovered 'head' is not
235
# really a head - its an ancestor of the newly
238
head_ancestors[ie.revision] = ancestors
239
heads[ie.revision] = ie
242
def get_tar_item(self, root, dp, now, tree):
243
"""Get a tarfile item and a file stream for its content."""
244
item = tarfile.TarInfo(pathjoin(root, dp))
245
# TODO: would be cool to actually set it to the timestamp of the
246
# revision it was last changed
248
fileobj = self._put_in_tar(item, tree)
252
"""Return true if the object this entry represents has textual data.
254
Note that textual data includes binary content.
256
Also note that all entries get weave files created for them.
257
This attribute is primarily used when upgrading from old trees that
258
did not have the weave index for all inventory entries.
262
def __init__(self, file_id, name, parent_id, text_id=None):
101
263
"""Create an InventoryEntry
103
265
The filename must be a single component, relative to the
104
266
parent directory; it cannot be a whole path or relative name.
106
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
268
>>> e = InventoryFile('123', 'hello.c', ROOT_ID)
111
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
273
>>> e = InventoryFile('123', 'src/hello.c', ROOT_ID)
112
274
Traceback (most recent call last):
113
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
275
InvalidEntryName: Invalid entry name: src/hello.c
277
assert isinstance(name, basestring), name
115
278
if '/' in name or '\\' in name:
116
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
279
raise InvalidEntryName(name=name)
280
self.executable = False
118
282
self.text_sha1 = None
119
283
self.text_size = None
121
284
self.file_id = file_id
124
286
self.text_id = text_id
125
287
self.parent_id = parent_id
126
if kind == 'directory':
131
raise BzrError("unhandled entry kind %r" % kind)
288
self.symlink_target = None
290
def kind_character(self):
291
"""Return a short kind indicator useful for appending to names."""
292
raise BzrError('unknown kind %r' % self.kind)
294
known_kinds = ('file', 'directory', 'symlink', 'root_directory')
296
def _put_in_tar(self, item, tree):
297
"""populate item for stashing in a tar, and return the content stream.
299
If no content is available, return None.
301
raise BzrError("don't know how to export {%s} of kind %r" %
302
(self.file_id, self.kind))
304
def put_on_disk(self, dest, dp, tree):
305
"""Create a representation of self on disk in the prefix dest.
307
This is a template method - implement _put_on_disk in subclasses.
309
fullpath = pathjoin(dest, dp)
310
self._put_on_disk(fullpath, tree)
311
mutter(" export {%s} kind %s to %s", self.file_id,
314
def _put_on_disk(self, fullpath, tree):
315
"""Put this entry onto disk at fullpath, from tree tree."""
316
raise BzrError("don't know how to export {%s} of kind %r" % (self.file_id, self.kind))
135
318
def sorted_children(self):
136
319
l = self.children.items()
324
def versionable_kind(kind):
325
return kind in ('file', 'directory', 'symlink')
327
def check(self, checker, rev_id, inv, tree):
328
"""Check this inventory entry is intact.
330
This is a template method, override _check for kind specific
333
:param checker: Check object providing context for the checks;
334
can be used to find out what parts of the repository have already
336
:param rev_id: Revision id from which this InventoryEntry was loaded.
337
Not necessarily the last-changed revision for this file.
338
:param inv: Inventory from which the entry was loaded.
339
:param tree: RevisionTree for this entry.
341
if self.parent_id != None:
342
if not inv.has_id(self.parent_id):
343
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
344
% (self.parent_id, rev_id))
345
self._check(checker, rev_id, tree)
347
def _check(self, checker, rev_id, tree):
348
"""Check this inventory entry for kind specific errors."""
349
raise BzrCheckError('unknown entry kind %r in revision {%s}' %
142
other = InventoryEntry(self.file_id, self.name, self.kind,
143
self.parent_id, text_id=self.text_id)
144
other.text_sha1 = self.text_sha1
145
other.text_size = self.text_size
146
# note that children are *not* copied; they're pulled across when
354
"""Clone this inventory entry."""
355
raise NotImplementedError
357
def _get_snapshot_change(self, previous_entries):
358
if len(previous_entries) > 1:
360
elif len(previous_entries) == 0:
363
return 'modified/renamed/reparented'
151
365
def __repr__(self):
152
return ("%s(%r, %r, kind=%r, parent_id=%r)"
366
return ("%s(%r, %r, parent_id=%r)"
153
367
% (self.__class__.__name__,
160
def to_element(self):
161
"""Convert to XML element"""
162
from bzrlib.xml import Element
166
e.set('name', self.name)
167
e.set('file_id', self.file_id)
168
e.set('kind', self.kind)
170
if self.text_size != None:
171
e.set('text_size', '%d' % self.text_size)
173
for f in ['text_id', 'text_sha1']:
178
# to be conservative, we don't externalize the root pointers
179
# for now, leaving them as null in the xml form. in a future
180
# version it will be implied by nested elements.
181
if self.parent_id != ROOT_ID:
182
assert isinstance(self.parent_id, basestring)
183
e.set('parent_id', self.parent_id)
190
def from_element(cls, elt):
191
assert elt.tag == 'entry'
193
## original format inventories don't have a parent_id for
194
## nodes in the root directory, but it's cleaner to use one
196
parent_id = elt.get('parent_id')
197
if parent_id == None:
200
self = cls(elt.get('file_id'), elt.get('name'), elt.get('kind'), parent_id)
201
self.text_id = elt.get('text_id')
202
self.text_sha1 = elt.get('text_sha1')
204
## mutter("read inventoryentry: %r" % (elt.attrib))
206
v = elt.get('text_size')
207
self.text_size = v and int(v)
212
from_element = classmethod(from_element)
372
def snapshot(self, revision, path, previous_entries,
373
work_tree, weave_store, transaction):
374
"""Make a snapshot of this entry which may or may not have changed.
376
This means that all its fields are populated, that it has its
377
text stored in the text store or weave.
379
mutter('new parents of %s are %r', path, previous_entries)
380
self._read_tree_state(path, work_tree)
381
if len(previous_entries) == 1:
382
# cannot be unchanged unless there is only one parent file rev.
383
parent_ie = previous_entries.values()[0]
384
if self._unchanged(parent_ie):
385
mutter("found unchanged entry")
386
self.revision = parent_ie.revision
388
return self.snapshot_revision(revision, previous_entries,
389
work_tree, weave_store, transaction)
391
def snapshot_revision(self, revision, previous_entries, work_tree,
392
weave_store, transaction):
393
"""Record this revision unconditionally."""
394
mutter('new revision for {%s}', self.file_id)
395
self.revision = revision
396
change = self._get_snapshot_change(previous_entries)
397
self._snapshot_text(previous_entries, work_tree, weave_store,
401
def _snapshot_text(self, file_parents, work_tree, weave_store, transaction):
402
"""Record the 'text' of this entry, whatever form that takes.
404
This default implementation simply adds an empty text.
406
mutter('storing file {%s} in revision {%s}',
407
self.file_id, self.revision)
408
self._add_text_to_weave([], file_parents.keys(), weave_store, transaction)
214
410
def __eq__(self, other):
215
411
if not isinstance(other, InventoryEntry):
216
412
return NotImplemented
218
return (self.file_id == other.file_id) \
219
and (self.name == other.name) \
220
and (self.text_sha1 == other.text_sha1) \
221
and (self.text_size == other.text_size) \
222
and (self.text_id == other.text_id) \
223
and (self.parent_id == other.parent_id) \
224
and (self.kind == other.kind)
414
return ((self.file_id == other.file_id)
415
and (self.name == other.name)
416
and (other.symlink_target == self.symlink_target)
417
and (self.text_sha1 == other.text_sha1)
418
and (self.text_size == other.text_size)
419
and (self.text_id == other.text_id)
420
and (self.parent_id == other.parent_id)
421
and (self.kind == other.kind)
422
and (self.revision == other.revision)
423
and (self.executable == other.executable)
227
426
def __ne__(self, other):
228
427
return not (self == other)
248
478
and (self.children == other.children)
481
class InventoryDirectory(InventoryEntry):
482
"""A directory in an inventory."""
484
def _check(self, checker, rev_id, tree):
485
"""See InventoryEntry._check"""
486
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
487
raise BzrCheckError('directory {%s} has text in revision {%s}'
488
% (self.file_id, rev_id))
491
other = InventoryDirectory(self.file_id, self.name, self.parent_id)
492
other.revision = self.revision
493
# note that children are *not* copied; they're pulled across when
497
def __init__(self, file_id, name, parent_id):
498
super(InventoryDirectory, self).__init__(file_id, name, parent_id)
500
self.kind = 'directory'
502
def kind_character(self):
503
"""See InventoryEntry.kind_character."""
506
def _put_in_tar(self, item, tree):
507
"""See InventoryEntry._put_in_tar."""
508
item.type = tarfile.DIRTYPE
515
def _put_on_disk(self, fullpath, tree):
516
"""See InventoryEntry._put_on_disk."""
520
class InventoryFile(InventoryEntry):
521
"""A file in an inventory."""
523
def _check(self, checker, tree_revision_id, tree):
524
"""See InventoryEntry._check"""
525
t = (self.file_id, self.revision)
526
if t in checker.checked_texts:
527
prev_sha = checker.checked_texts[t]
528
if prev_sha != self.text_sha1:
529
raise BzrCheckError('mismatched sha1 on {%s} in {%s}' %
530
(self.file_id, tree_revision_id))
532
checker.repeated_text_cnt += 1
535
if self.file_id not in checker.checked_weaves:
536
mutter('check weave {%s}', self.file_id)
537
w = tree.get_weave(self.file_id)
538
# Not passing a progress bar, because it creates a new
539
# progress, which overwrites the current progress,
540
# and doesn't look nice
542
checker.checked_weaves[self.file_id] = True
544
w = tree.get_weave(self.file_id)
546
mutter('check version {%s} of {%s}', tree_revision_id, self.file_id)
547
checker.checked_text_cnt += 1
548
# We can't check the length, because Weave doesn't store that
549
# information, and the whole point of looking at the weave's
550
# sha1sum is that we don't have to extract the text.
551
if self.text_sha1 != w.get_sha1(self.revision):
552
raise BzrCheckError('text {%s} version {%s} wrong sha1'
553
% (self.file_id, self.revision))
554
checker.checked_texts[t] = self.text_sha1
557
other = InventoryFile(self.file_id, self.name, self.parent_id)
558
other.executable = self.executable
559
other.text_id = self.text_id
560
other.text_sha1 = self.text_sha1
561
other.text_size = self.text_size
562
other.revision = self.revision
565
def detect_changes(self, old_entry):
566
"""See InventoryEntry.detect_changes."""
567
assert self.text_sha1 != None
568
assert old_entry.text_sha1 != None
569
text_modified = (self.text_sha1 != old_entry.text_sha1)
570
meta_modified = (self.executable != old_entry.executable)
571
return text_modified, meta_modified
573
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
574
output_to, reverse=False):
575
"""See InventoryEntry._diff."""
577
from_text = tree.get_file(self.file_id).readlines()
579
to_text = to_tree.get_file(to_entry.file_id).readlines()
583
text_diff(from_label, from_text,
584
to_label, to_text, output_to)
586
text_diff(to_label, to_text,
587
from_label, from_text, output_to)
590
label_pair = (to_label, from_label)
592
label_pair = (from_label, to_label)
593
print >> output_to, "Binary files %s and %s differ" % label_pair
596
"""See InventoryEntry.has_text."""
599
def __init__(self, file_id, name, parent_id):
600
super(InventoryFile, self).__init__(file_id, name, parent_id)
603
def kind_character(self):
604
"""See InventoryEntry.kind_character."""
607
def _put_in_tar(self, item, tree):
608
"""See InventoryEntry._put_in_tar."""
609
item.type = tarfile.REGTYPE
610
fileobj = tree.get_file(self.file_id)
611
item.size = self.text_size
612
if tree.is_executable(self.file_id):
618
def _put_on_disk(self, fullpath, tree):
619
"""See InventoryEntry._put_on_disk."""
620
pumpfile(tree.get_file(self.file_id), file(fullpath, 'wb'))
621
if tree.is_executable(self.file_id):
622
os.chmod(fullpath, 0755)
624
def _read_tree_state(self, path, work_tree):
625
"""See InventoryEntry._read_tree_state."""
626
self.text_sha1 = work_tree.get_file_sha1(self.file_id)
627
self.executable = work_tree.is_executable(self.file_id)
629
def _forget_tree_state(self):
630
self.text_sha1 = None
631
self.executable = None
633
def _snapshot_text(self, file_parents, work_tree, weave_store, transaction):
634
"""See InventoryEntry._snapshot_text."""
635
mutter('storing file {%s} in revision {%s}',
636
self.file_id, self.revision)
637
# special case to avoid diffing on renames or
639
if (len(file_parents) == 1
640
and self.text_sha1 == file_parents.values()[0].text_sha1
641
and self.text_size == file_parents.values()[0].text_size):
642
previous_ie = file_parents.values()[0]
643
versionedfile = weave_store.get_weave(self.file_id, transaction)
644
versionedfile.clone_text(self.revision, previous_ie.revision, file_parents.keys())
646
new_lines = work_tree.get_file(self.file_id).readlines()
647
self._add_text_to_weave(new_lines, file_parents.keys(), weave_store,
649
self.text_sha1 = sha_strings(new_lines)
650
self.text_size = sum(map(len, new_lines))
653
def _unchanged(self, previous_ie):
654
"""See InventoryEntry._unchanged."""
655
compatible = super(InventoryFile, self)._unchanged(previous_ie)
656
if self.text_sha1 != previous_ie.text_sha1:
659
# FIXME: 20050930 probe for the text size when getting sha1
660
# in _read_tree_state
661
self.text_size = previous_ie.text_size
662
if self.executable != previous_ie.executable:
667
class InventoryLink(InventoryEntry):
668
"""A file in an inventory."""
670
__slots__ = ['symlink_target']
672
def _check(self, checker, rev_id, tree):
673
"""See InventoryEntry._check"""
674
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
675
raise BzrCheckError('symlink {%s} has text in revision {%s}'
676
% (self.file_id, rev_id))
677
if self.symlink_target == None:
678
raise BzrCheckError('symlink {%s} has no target in revision {%s}'
679
% (self.file_id, rev_id))
682
other = InventoryLink(self.file_id, self.name, self.parent_id)
683
other.symlink_target = self.symlink_target
684
other.revision = self.revision
687
def detect_changes(self, old_entry):
688
"""See InventoryEntry.detect_changes."""
689
# FIXME: which _modified field should we use ? RBC 20051003
690
text_modified = (self.symlink_target != old_entry.symlink_target)
692
mutter(" symlink target changed")
693
meta_modified = False
694
return text_modified, meta_modified
696
def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
697
output_to, reverse=False):
698
"""See InventoryEntry._diff."""
699
from_text = self.symlink_target
700
if to_entry is not None:
701
to_text = to_entry.symlink_target
706
print >>output_to, '=== target changed %r => %r' % (from_text, to_text)
709
print >>output_to, '=== target was %r' % self.symlink_target
711
print >>output_to, '=== target is %r' % self.symlink_target
713
def __init__(self, file_id, name, parent_id):
714
super(InventoryLink, self).__init__(file_id, name, parent_id)
715
self.kind = 'symlink'
717
def kind_character(self):
718
"""See InventoryEntry.kind_character."""
721
def _put_in_tar(self, item, tree):
722
"""See InventoryEntry._put_in_tar."""
723
item.type = tarfile.SYMTYPE
727
item.linkname = self.symlink_target
730
def _put_on_disk(self, fullpath, tree):
731
"""See InventoryEntry._put_on_disk."""
733
os.symlink(self.symlink_target, fullpath)
735
raise BzrError("Failed to create symlink %r -> %r, error: %s" % (fullpath, self.symlink_target, e))
737
def _read_tree_state(self, path, work_tree):
738
"""See InventoryEntry._read_tree_state."""
739
self.symlink_target = work_tree.get_symlink_target(self.file_id)
741
def _forget_tree_state(self):
742
self.symlink_target = None
744
def _unchanged(self, previous_ie):
745
"""See InventoryEntry._unchanged."""
746
compatible = super(InventoryLink, self)._unchanged(previous_ie)
747
if self.symlink_target != previous_ie.symlink_target:
252
752
class Inventory(object):
253
753
"""Inventory of versioned files in a tree.