1
# (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# TODO: Maybe also keep the full path of the entry, and the children?
19
# But those depend on its position within a particular inventory, and
20
# it would be nice not to need to hold the backpointer here.
22
# TODO: Perhaps split InventoryEntry into subclasses for files,
23
# directories, etc etc.
26
# This should really be an id randomly assigned when the tree is
27
# created, but it's not for now.
37
from bzrlib.errors import BzrError, BzrCheckError
39
from bzrlib.osutils import quotefn, splitpath, joinpath, appendpath, sha_strings
40
from bzrlib.trace import mutter
41
from bzrlib.errors import NotVersionedError
44
class InventoryEntry(object):
45
"""Description of a versioned file.
47
An InventoryEntry has the following fields, which are also
48
present in the XML inventory-entry element:
53
(within the parent directory)
56
'directory' or 'file' or 'symlink'
59
file_id of the parent directory, or ROOT_ID
62
the revision_id in which this variation of this file was
66
Indicates that this file should be executable on systems
70
sha-1 of the text of the file
73
size in bytes of the text of the file
75
(reading a version 4 tree created a text_id field.)
80
>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))
81
InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT')
82
>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))
83
InventoryEntry('2323', 'hello.c', kind='file', parent_id='123')
84
>>> for j in i.iter_entries():
87
('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))
88
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
89
>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))
90
Traceback (most recent call last):
92
BzrError: inventory already contains entry with id {2323}
93
>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))
94
InventoryEntry('2324', 'bye.c', kind='file', parent_id='123')
95
>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))
96
InventoryEntry('2325', 'wibble', kind='directory', parent_id='123')
97
>>> i.path2id('src/wibble')
101
>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))
102
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
104
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
105
>>> for path, entry in i.iter_entries():
106
... print path.replace('\\\\', '/') # for win32 os.sep
107
... assert i.path2id(path)
114
>>> i.id2path('2326').replace('\\\\', '/')
115
'src/wibble/wibble.c'
118
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
119
'text_id', 'parent_id', 'children', 'executable',
120
'revision', 'symlink_target']
122
def _add_text_to_weave(self, new_lines, parents, weave_store):
123
weave_store.add_text(self.file_id, self.revision, new_lines, parents)
125
def detect_changes(self, old_entry):
126
"""Return a (text_modified, meta_modified) from this to old_entry.
128
_read_tree_state must have been called on self and old_entry prior to
129
calling detect_changes.
131
if self.kind == 'file':
132
assert self.text_sha1 != None
133
assert old_entry.text_sha1 != None
134
text_modified = (self.text_sha1 != old_entry.text_sha1)
135
meta_modified = (self.executable != old_entry.executable)
136
elif self.kind == 'symlink':
137
# FIXME: which _modified field should we use ? RBC 20051003
138
text_modified = (self.symlink_target != old_entry.symlink_target)
140
mutter(" symlink target changed")
141
meta_modified = False
143
text_modified = False
144
meta_modified = False
145
return text_modified, meta_modified
147
def diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
148
output_to, reverse=False):
149
"""Perform a diff from this to to_entry.
151
text_diff will be used for textual difference calculation.
153
self._read_tree_state(tree.id2path(self.file_id), tree)
155
# cannot diff from one kind to another - you must do a removal
156
# and an addif they do not match.
157
assert self.kind == to_entry.kind
158
to_entry._read_tree_state(to_tree.id2path(to_entry.file_id),
160
if self.kind == 'file':
161
from_text = tree.get_file(self.file_id).readlines()
163
to_text = to_tree.get_file(to_entry.file_id).readlines()
167
text_diff(from_label, from_text,
168
to_label, to_text, output_to)
170
text_diff(to_label, to_text,
171
from_label, from_text, output_to)
172
elif self.kind == 'symlink':
173
from_text = self.symlink_target
174
if to_entry is not None:
175
to_text = to_entry.symlink_target
180
print >>output_to, '=== target changed %r => %r' % (from_text, to_text)
183
print >>output_to, '=== target was %r' % self.symlink_target
185
print >>output_to, '=== target is %r' % self.symlink_target
188
"""Return true if the object this entry represents has textual data.
190
Note that textual data includes binary content.
192
if self.kind =='file':
197
def __init__(self, file_id, name, kind, parent_id, text_id=None):
198
"""Create an InventoryEntry
200
The filename must be a single component, relative to the
201
parent directory; it cannot be a whole path or relative name.
203
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
208
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
209
Traceback (most recent call last):
210
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
212
assert isinstance(name, basestring), name
213
if '/' in name or '\\' in name:
214
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
216
self.executable = False
218
self.text_sha1 = None
219
self.text_size = None
220
self.file_id = file_id
223
self.text_id = text_id
224
self.parent_id = parent_id
225
self.symlink_target = None
226
if kind == 'directory':
230
elif kind == 'symlink':
233
raise BzrError("unhandled entry kind %r" % kind)
235
def kind_character(self):
236
"""Return a short kind indicator useful for appending to names."""
237
if self.kind == 'directory':
239
if self.kind == 'file':
241
if self.kind == 'symlink':
243
raise RuntimeError('unreachable code')
245
known_kinds = ('file', 'directory', 'symlink', 'root_directory')
247
def sorted_children(self):
248
l = self.children.items()
253
def versionable_kind(kind):
254
return kind in ('file', 'directory', 'symlink')
256
def check(self, checker, rev_id, inv, tree):
257
if self.parent_id != None:
258
if not inv.has_id(self.parent_id):
259
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
260
% (self.parent_id, rev_id))
261
if self.kind == 'file':
262
revision = self.revision
263
t = (self.file_id, revision)
264
if t in checker.checked_texts:
265
prev_sha = checker.checked_texts[t]
266
if prev_sha != self.text_sha1:
267
raise BzrCheckError('mismatched sha1 on {%s} in {%s}' %
268
(self.file_id, rev_id))
270
checker.repeated_text_cnt += 1
272
mutter('check version {%s} of {%s}', rev_id, self.file_id)
273
file_lines = tree.get_file_lines(self.file_id)
274
checker.checked_text_cnt += 1
275
if self.text_size != sum(map(len, file_lines)):
276
raise BzrCheckError('text {%s} wrong size' % self.text_id)
277
if self.text_sha1 != sha_strings(file_lines):
278
raise BzrCheckError('text {%s} wrong sha1' % self.text_id)
279
checker.checked_texts[t] = self.text_sha1
280
elif self.kind == 'directory':
281
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
282
raise BzrCheckError('directory {%s} has text in revision {%s}'
283
% (self.file_id, rev_id))
284
elif self.kind == 'root_directory':
286
elif self.kind == 'symlink':
287
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
288
raise BzrCheckError('symlink {%s} has text in revision {%s}'
289
% (self.file_id, rev_id))
290
if self.symlink_target == None:
291
raise BzrCheckError('symlink {%s} has no target in revision {%s}'
292
% (self.file_id, rev_id))
294
raise BzrCheckError('unknown entry kind %r in revision {%s}' %
299
other = InventoryEntry(self.file_id, self.name, self.kind,
301
other.executable = self.executable
302
other.text_id = self.text_id
303
other.text_sha1 = self.text_sha1
304
other.text_size = self.text_size
305
other.symlink_target = self.symlink_target
306
other.revision = self.revision
307
# note that children are *not* copied; they're pulled across when
311
def _get_snapshot_change(self, previous_entries):
312
if len(previous_entries) > 1:
314
elif len(previous_entries) == 0:
317
return 'modified/renamed/reparented'
320
return ("%s(%r, %r, kind=%r, parent_id=%r)"
321
% (self.__class__.__name__,
327
def snapshot(self, revision, path, previous_entries, work_tree,
329
"""Make a snapshot of this entry.
331
This means that all its fields are populated, that it has its
332
text stored in the text store or weave.
334
mutter('new parents of %s are %r', path, previous_entries)
335
self._read_tree_state(path, work_tree)
336
if len(previous_entries) == 1:
337
# cannot be unchanged unless there is only one parent file rev.
338
parent_ie = previous_entries.values()[0]
339
if self._unchanged(path, parent_ie, work_tree):
340
mutter("found unchanged entry")
341
self.revision = parent_ie.revision
343
mutter('new revision for {%s}', self.file_id)
344
self.revision = revision
345
change = self._get_snapshot_change(previous_entries)
346
if self.kind != 'file':
348
self._snapshot_text(previous_entries, work_tree, weave_store)
351
def _snapshot_text(self, file_parents, work_tree, weave_store):
352
mutter('storing file {%s} in revision {%s}',
353
self.file_id, self.revision)
354
# special case to avoid diffing on renames or
356
if (len(file_parents) == 1
357
and self.text_sha1 == file_parents.values()[0].text_sha1
358
and self.text_size == file_parents.values()[0].text_size):
359
previous_ie = file_parents.values()[0]
360
weave_store.add_identical_text(
361
self.file_id, previous_ie.revision,
362
self.revision, file_parents)
364
new_lines = work_tree.get_file(self.file_id).readlines()
365
self._add_text_to_weave(new_lines, file_parents, weave_store)
366
self.text_sha1 = sha_strings(new_lines)
367
self.text_size = sum(map(len, new_lines))
369
def __eq__(self, other):
370
if not isinstance(other, InventoryEntry):
371
return NotImplemented
373
return ((self.file_id == other.file_id)
374
and (self.name == other.name)
375
and (other.symlink_target == self.symlink_target)
376
and (self.text_sha1 == other.text_sha1)
377
and (self.text_size == other.text_size)
378
and (self.text_id == other.text_id)
379
and (self.parent_id == other.parent_id)
380
and (self.kind == other.kind)
381
and (self.revision == other.revision)
382
and (self.executable == other.executable)
385
def __ne__(self, other):
386
return not (self == other)
389
raise ValueError('not hashable')
391
def _unchanged(self, path, previous_ie, work_tree):
393
# different inv parent
394
if previous_ie.parent_id != self.parent_id:
397
elif previous_ie.name != self.name:
399
if self.kind == 'symlink':
400
if self.symlink_target != previous_ie.symlink_target:
402
if self.kind == 'file':
403
if self.text_sha1 != previous_ie.text_sha1:
406
# FIXME: 20050930 probe for the text size when getting sha1
407
# in _read_tree_state
408
self.text_size = previous_ie.text_size
411
def _read_tree_state(self, path, work_tree):
412
if self.kind == 'symlink':
413
self.symlink_target = work_tree.get_symlink_target(self.file_id)
414
if self.kind == 'file':
415
self.text_sha1 = work_tree.get_file_sha1(self.file_id)
416
self.executable = work_tree.is_executable(self.file_id)
419
class RootEntry(InventoryEntry):
420
def __init__(self, file_id):
421
self.file_id = file_id
423
self.kind = 'root_directory'
424
self.parent_id = None
427
def __eq__(self, other):
428
if not isinstance(other, RootEntry):
429
return NotImplemented
431
return (self.file_id == other.file_id) \
432
and (self.children == other.children)
436
class Inventory(object):
437
"""Inventory of versioned files in a tree.
439
This describes which file_id is present at each point in the tree,
440
and possibly the SHA-1 or other information about the file.
441
Entries can be looked up either by path or by file_id.
443
The inventory represents a typical unix file tree, with
444
directories containing files and subdirectories. We never store
445
the full path to a file, because renaming a directory implicitly
446
moves all of its contents. This class internally maintains a
447
lookup tree that allows the children under a directory to be
450
InventoryEntry objects must not be modified after they are
451
inserted, other than through the Inventory API.
453
>>> inv = Inventory()
454
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
455
InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT')
456
>>> inv['123-123'].name
459
May be treated as an iterator or set to look up file ids:
461
>>> bool(inv.path2id('hello.c'))
466
May also look up by name:
468
>>> [x[0] for x in inv.iter_entries()]
470
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
471
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
472
InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT-12345678-12345678')
474
def __init__(self, root_id=ROOT_ID):
475
"""Create or read an inventory.
477
If a working directory is specified, the inventory is read
478
from there. If the file is specified, read from that. If not,
479
the inventory is created empty.
481
The inventory is created with a default root directory, with
484
# We are letting Branch.initialize() create a unique inventory
485
# root id. Rather than generating a random one here.
487
# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')
488
self.root = RootEntry(root_id)
489
self._byid = {self.root.file_id: self.root}
493
other = Inventory(self.root.file_id)
494
# copy recursively so we know directories will be added before
495
# their children. There are more efficient ways than this...
496
for path, entry in self.iter_entries():
497
if entry == self.root:
499
other.add(entry.copy())
504
return iter(self._byid)
508
"""Returns number of entries."""
509
return len(self._byid)
512
def iter_entries(self, from_dir=None):
513
"""Return (path, entry) pairs, in order by name."""
517
elif isinstance(from_dir, basestring):
518
from_dir = self._byid[from_dir]
520
kids = from_dir.children.items()
522
for name, ie in kids:
524
if ie.kind == 'directory':
525
for cn, cie in self.iter_entries(from_dir=ie.file_id):
526
yield os.path.join(name, cn), cie
530
"""Return list of (path, ie) for all entries except the root.
532
This may be faster than iter_entries.
535
def descend(dir_ie, dir_path):
536
kids = dir_ie.children.items()
538
for name, ie in kids:
539
child_path = os.path.join(dir_path, name)
540
accum.append((child_path, ie))
541
if ie.kind == 'directory':
542
descend(ie, child_path)
544
descend(self.root, '')
548
def directories(self):
549
"""Return (path, entry) pairs for all directories, including the root.
552
def descend(parent_ie, parent_path):
553
accum.append((parent_path, parent_ie))
555
kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']
558
for name, child_ie in kids:
559
child_path = os.path.join(parent_path, name)
560
descend(child_ie, child_path)
561
descend(self.root, '')
566
def __contains__(self, file_id):
567
"""True if this entry contains a file with given id.
569
>>> inv = Inventory()
570
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
571
InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')
577
return file_id in self._byid
580
def __getitem__(self, file_id):
581
"""Return the entry for given file_id.
583
>>> inv = Inventory()
584
>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))
585
InventoryEntry('123123', 'hello.c', kind='file', parent_id='TREE_ROOT')
586
>>> inv['123123'].name
590
return self._byid[file_id]
593
raise BzrError("can't look up file_id None")
595
raise BzrError("file_id {%s} not in inventory" % file_id)
598
def get_file_kind(self, file_id):
599
return self._byid[file_id].kind
601
def get_child(self, parent_id, filename):
602
return self[parent_id].children.get(filename)
605
def add(self, entry):
606
"""Add entry to inventory.
608
To add a file to a branch ready to be committed, use Branch.add,
611
Returns the new entry object.
613
if entry.file_id in self._byid:
614
raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)
616
if entry.parent_id == ROOT_ID or entry.parent_id is None:
617
entry.parent_id = self.root.file_id
620
parent = self._byid[entry.parent_id]
622
raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)
624
if parent.children.has_key(entry.name):
625
raise BzrError("%s is already versioned" %
626
appendpath(self.id2path(parent.file_id), entry.name))
628
self._byid[entry.file_id] = entry
629
parent.children[entry.name] = entry
633
def add_path(self, relpath, kind, file_id=None):
634
"""Add entry from a path.
636
The immediate parent must already be versioned.
638
Returns the new entry object."""
639
from bzrlib.branch import gen_file_id
641
parts = bzrlib.osutils.splitpath(relpath)
643
raise BzrError("cannot re-add root of inventory")
646
file_id = gen_file_id(relpath)
648
parent_path = parts[:-1]
649
parent_id = self.path2id(parent_path)
650
if parent_id == None:
651
raise NotVersionedError(parent_path)
653
ie = InventoryEntry(file_id, parts[-1],
654
kind=kind, parent_id=parent_id)
658
def __delitem__(self, file_id):
659
"""Remove entry by id.
661
>>> inv = Inventory()
662
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
663
InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')
672
assert self[ie.parent_id].children[ie.name] == ie
674
# TODO: Test deleting all children; maybe hoist to a separate
676
if ie.kind == 'directory':
677
for cie in ie.children.values():
678
del self[cie.file_id]
681
del self._byid[file_id]
682
del self[ie.parent_id].children[ie.name]
685
def __eq__(self, other):
686
"""Compare two sets by comparing their contents.
692
>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
693
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
696
>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
697
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
701
if not isinstance(other, Inventory):
702
return NotImplemented
704
if len(self._byid) != len(other._byid):
705
# shortcut: obviously not the same
708
return self._byid == other._byid
711
def __ne__(self, other):
712
return not self.__eq__(other)
716
raise ValueError('not hashable')
719
def get_idpath(self, file_id):
720
"""Return a list of file_ids for the path to an entry.
722
The list contains one element for each directory followed by
723
the id of the file itself. So the length of the returned list
724
is equal to the depth of the file in the tree, counting the
725
root directory as depth 1.
728
while file_id != None:
730
ie = self._byid[file_id]
732
raise BzrError("file_id {%s} not found in inventory" % file_id)
733
p.insert(0, ie.file_id)
734
file_id = ie.parent_id
738
def id2path(self, file_id):
739
"""Return as a list the path to file_id."""
741
# get all names, skipping root
742
p = [self._byid[fid].name for fid in self.get_idpath(file_id)[1:]]
743
return os.sep.join(p)
747
def path2id(self, name):
748
"""Walk down through directories to return entry of last component.
750
names may be either a list of path components, or a single
751
string, in which case it is automatically split.
753
This returns the entry of the last component in the path,
754
which may be either a file or a directory.
756
Returns None iff the path is not found.
758
if isinstance(name, types.StringTypes):
759
name = splitpath(name)
761
mutter("lookup path %r" % name)
766
cie = parent.children[f]
768
assert cie.parent_id == parent.file_id
774
return parent.file_id
777
def has_filename(self, names):
778
return bool(self.path2id(names))
781
def has_id(self, file_id):
782
return self._byid.has_key(file_id)
785
def rename(self, file_id, new_parent_id, new_name):
786
"""Move a file within the inventory.
788
This can change either the name, or the parent, or both.
790
This does not move the working file."""
791
if not is_valid_name(new_name):
792
raise BzrError("not an acceptable filename: %r" % new_name)
794
new_parent = self._byid[new_parent_id]
795
if new_name in new_parent.children:
796
raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))
798
new_parent_idpath = self.get_idpath(new_parent_id)
799
if file_id in new_parent_idpath:
800
raise BzrError("cannot move directory %r into a subdirectory of itself, %r"
801
% (self.id2path(file_id), self.id2path(new_parent_id)))
803
file_ie = self._byid[file_id]
804
old_parent = self._byid[file_ie.parent_id]
806
# TODO: Don't leave things messed up if this fails
808
del old_parent.children[file_ie.name]
809
new_parent.children[new_name] = file_ie
811
file_ie.name = new_name
812
file_ie.parent_id = new_parent_id
819
def is_valid_name(name):
822
_NAME_RE = re.compile(r'^[^/\\]+$')
824
return bool(_NAME_RE.match(name))