1
# (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# TODO: Maybe also keep the full path of the entry, and the children?
19
# But those depend on its position within a particular inventory, and
20
# it would be nice not to need to hold the backpointer here.
22
# TODO: Perhaps split InventoryEntry into subclasses for files,
23
# directories, etc etc.
26
# This should really be an id randomly assigned when the tree is
27
# created, but it's not for now.
38
from bzrlib.errors import BzrError, BzrCheckError
40
from bzrlib.osutils import (pumpfile, quotefn, splitpath, joinpath,
41
appendpath, sha_strings)
42
from bzrlib.trace import mutter
43
from bzrlib.errors import NotVersionedError
46
class InventoryEntry(object):
47
"""Description of a versioned file.
49
An InventoryEntry has the following fields, which are also
50
present in the XML inventory-entry element:
55
(within the parent directory)
58
'directory' or 'file' or 'symlink'
61
file_id of the parent directory, or ROOT_ID
64
the revision_id in which this variation of this file was
68
Indicates that this file should be executable on systems
72
sha-1 of the text of the file
75
size in bytes of the text of the file
77
(reading a version 4 tree created a text_id field.)
82
>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))
83
InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT')
84
>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))
85
InventoryEntry('2323', 'hello.c', kind='file', parent_id='123')
86
>>> for j in i.iter_entries():
89
('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))
90
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
91
>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))
92
Traceback (most recent call last):
94
BzrError: inventory already contains entry with id {2323}
95
>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))
96
InventoryEntry('2324', 'bye.c', kind='file', parent_id='123')
97
>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))
98
InventoryEntry('2325', 'wibble', kind='directory', parent_id='123')
99
>>> i.path2id('src/wibble')
103
>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))
104
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
106
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
107
>>> for path, entry in i.iter_entries():
108
... print path.replace('\\\\', '/') # for win32 os.sep
109
... assert i.path2id(path)
116
>>> i.id2path('2326').replace('\\\\', '/')
117
'src/wibble/wibble.c'
120
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
121
'text_id', 'parent_id', 'children', 'executable',
122
'revision', 'symlink_target']
124
def _add_text_to_weave(self, new_lines, parents, weave_store):
125
weave_store.add_text(self.file_id, self.revision, new_lines, parents)
127
def detect_changes(self, old_entry):
128
"""Return a (text_modified, meta_modified) from this to old_entry.
130
_read_tree_state must have been called on self and old_entry prior to
131
calling detect_changes.
133
if self.kind == 'file':
134
assert self.text_sha1 != None
135
assert old_entry.text_sha1 != None
136
text_modified = (self.text_sha1 != old_entry.text_sha1)
137
meta_modified = (self.executable != old_entry.executable)
138
elif self.kind == 'symlink':
139
# FIXME: which _modified field should we use ? RBC 20051003
140
text_modified = (self.symlink_target != old_entry.symlink_target)
142
mutter(" symlink target changed")
143
meta_modified = False
145
text_modified = False
146
meta_modified = False
147
return text_modified, meta_modified
149
def diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree,
150
output_to, reverse=False):
151
"""Perform a diff from this to to_entry.
153
text_diff will be used for textual difference calculation.
155
self._read_tree_state(tree.id2path(self.file_id), tree)
157
# cannot diff from one kind to another - you must do a removal
158
# and an addif they do not match.
159
assert self.kind == to_entry.kind
160
to_entry._read_tree_state(to_tree.id2path(to_entry.file_id),
162
if self.kind == 'file':
163
from_text = tree.get_file(self.file_id).readlines()
165
to_text = to_tree.get_file(to_entry.file_id).readlines()
169
text_diff(from_label, from_text,
170
to_label, to_text, output_to)
172
text_diff(to_label, to_text,
173
from_label, from_text, output_to)
174
elif self.kind == 'symlink':
175
from_text = self.symlink_target
176
if to_entry is not None:
177
to_text = to_entry.symlink_target
182
print >>output_to, '=== target changed %r => %r' % (from_text, to_text)
185
print >>output_to, '=== target was %r' % self.symlink_target
187
print >>output_to, '=== target is %r' % self.symlink_target
189
def get_tar_item(self, root, dp, now, tree):
190
item = tarfile.TarInfo(os.path.join(root, dp))
191
# TODO: would be cool to actually set it to the timestamp of the
192
# revision it was last changed
194
if self.kind == 'directory':
195
item.type = tarfile.DIRTYPE
200
elif self.kind == 'file':
201
item.type = tarfile.REGTYPE
202
fileobj = tree.get_file(self.file_id)
203
item.size = self.text_size
204
if tree.is_executable(self.file_id):
209
raise BzrError("don't know how to export {%s} of kind %r" %
210
(self.file_id, self.kind))
214
"""Return true if the object this entry represents has textual data.
216
Note that textual data includes binary content.
218
if self.kind =='file':
223
def __init__(self, file_id, name, kind, parent_id, text_id=None):
224
"""Create an InventoryEntry
226
The filename must be a single component, relative to the
227
parent directory; it cannot be a whole path or relative name.
229
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
234
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
235
Traceback (most recent call last):
236
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
238
assert isinstance(name, basestring), name
239
if '/' in name or '\\' in name:
240
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
242
self.executable = False
244
self.text_sha1 = None
245
self.text_size = None
246
self.file_id = file_id
249
self.text_id = text_id
250
self.parent_id = parent_id
251
self.symlink_target = None
252
if kind == 'directory':
256
elif kind == 'symlink':
259
raise BzrError("unhandled entry kind %r" % kind)
261
def kind_character(self):
262
"""Return a short kind indicator useful for appending to names."""
263
if self.kind == 'directory':
265
if self.kind == 'file':
267
if self.kind == 'symlink':
269
raise RuntimeError('unreachable code')
271
known_kinds = ('file', 'directory', 'symlink', 'root_directory')
273
def put_on_disk(self, dest, dp, tree):
274
"""Create a representation of self on disk in the prefix dest."""
275
fullpath = appendpath(dest, dp)
276
if self.kind == 'directory':
278
elif self.kind == 'file':
279
pumpfile(tree.get_file(self.file_id), file(fullpath, 'wb'))
280
if tree.is_executable(self.file_id):
281
os.chmod(fullpath, 0755)
282
elif self.kind == 'symlink':
284
os.symlink(self.symlink_target, fullpath)
286
raise BzrError("Failed to create symlink %r -> %r, error: %s" % (fullpath, self.symlink_target, e))
288
raise BzrError("don't know how to export {%s} of kind %r" % (self.file_id, self.kind))
289
mutter(" export {%s} kind %s to %s" % (self.file_id, self.kind, fullpath))
291
def sorted_children(self):
292
l = self.children.items()
297
def versionable_kind(kind):
298
return kind in ('file', 'directory', 'symlink')
300
def check(self, checker, rev_id, inv, tree):
301
if self.parent_id != None:
302
if not inv.has_id(self.parent_id):
303
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
304
% (self.parent_id, rev_id))
305
if self.kind == 'file':
306
revision = self.revision
307
t = (self.file_id, revision)
308
if t in checker.checked_texts:
309
prev_sha = checker.checked_texts[t]
310
if prev_sha != self.text_sha1:
311
raise BzrCheckError('mismatched sha1 on {%s} in {%s}' %
312
(self.file_id, rev_id))
314
checker.repeated_text_cnt += 1
316
mutter('check version {%s} of {%s}', rev_id, self.file_id)
317
file_lines = tree.get_file_lines(self.file_id)
318
checker.checked_text_cnt += 1
319
if self.text_size != sum(map(len, file_lines)):
320
raise BzrCheckError('text {%s} wrong size' % self.text_id)
321
if self.text_sha1 != sha_strings(file_lines):
322
raise BzrCheckError('text {%s} wrong sha1' % self.text_id)
323
checker.checked_texts[t] = self.text_sha1
324
elif self.kind == 'directory':
325
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
326
raise BzrCheckError('directory {%s} has text in revision {%s}'
327
% (self.file_id, rev_id))
328
elif self.kind == 'root_directory':
330
elif self.kind == 'symlink':
331
if self.text_sha1 != None or self.text_size != None or self.text_id != None:
332
raise BzrCheckError('symlink {%s} has text in revision {%s}'
333
% (self.file_id, rev_id))
334
if self.symlink_target == None:
335
raise BzrCheckError('symlink {%s} has no target in revision {%s}'
336
% (self.file_id, rev_id))
338
raise BzrCheckError('unknown entry kind %r in revision {%s}' %
343
other = InventoryEntry(self.file_id, self.name, self.kind,
345
other.executable = self.executable
346
other.text_id = self.text_id
347
other.text_sha1 = self.text_sha1
348
other.text_size = self.text_size
349
other.symlink_target = self.symlink_target
350
other.revision = self.revision
351
# note that children are *not* copied; they're pulled across when
355
def _get_snapshot_change(self, previous_entries):
356
if len(previous_entries) > 1:
358
elif len(previous_entries) == 0:
361
return 'modified/renamed/reparented'
364
return ("%s(%r, %r, kind=%r, parent_id=%r)"
365
% (self.__class__.__name__,
371
def snapshot(self, revision, path, previous_entries, work_tree,
373
"""Make a snapshot of this entry.
375
This means that all its fields are populated, that it has its
376
text stored in the text store or weave.
378
mutter('new parents of %s are %r', path, previous_entries)
379
self._read_tree_state(path, work_tree)
380
if len(previous_entries) == 1:
381
# cannot be unchanged unless there is only one parent file rev.
382
parent_ie = previous_entries.values()[0]
383
if self._unchanged(path, parent_ie, work_tree):
384
mutter("found unchanged entry")
385
self.revision = parent_ie.revision
387
mutter('new revision for {%s}', self.file_id)
388
self.revision = revision
389
change = self._get_snapshot_change(previous_entries)
390
if self.kind != 'file':
392
self._snapshot_text(previous_entries, work_tree, weave_store)
395
def _snapshot_text(self, file_parents, work_tree, weave_store):
396
mutter('storing file {%s} in revision {%s}',
397
self.file_id, self.revision)
398
# special case to avoid diffing on renames or
400
if (len(file_parents) == 1
401
and self.text_sha1 == file_parents.values()[0].text_sha1
402
and self.text_size == file_parents.values()[0].text_size):
403
previous_ie = file_parents.values()[0]
404
weave_store.add_identical_text(
405
self.file_id, previous_ie.revision,
406
self.revision, file_parents)
408
new_lines = work_tree.get_file(self.file_id).readlines()
409
self._add_text_to_weave(new_lines, file_parents, weave_store)
410
self.text_sha1 = sha_strings(new_lines)
411
self.text_size = sum(map(len, new_lines))
413
def __eq__(self, other):
414
if not isinstance(other, InventoryEntry):
415
return NotImplemented
417
return ((self.file_id == other.file_id)
418
and (self.name == other.name)
419
and (other.symlink_target == self.symlink_target)
420
and (self.text_sha1 == other.text_sha1)
421
and (self.text_size == other.text_size)
422
and (self.text_id == other.text_id)
423
and (self.parent_id == other.parent_id)
424
and (self.kind == other.kind)
425
and (self.revision == other.revision)
426
and (self.executable == other.executable)
429
def __ne__(self, other):
430
return not (self == other)
433
raise ValueError('not hashable')
435
def _unchanged(self, path, previous_ie, work_tree):
437
# different inv parent
438
if previous_ie.parent_id != self.parent_id:
441
elif previous_ie.name != self.name:
443
if self.kind == 'symlink':
444
if self.symlink_target != previous_ie.symlink_target:
446
if self.kind == 'file':
447
if self.text_sha1 != previous_ie.text_sha1:
450
# FIXME: 20050930 probe for the text size when getting sha1
451
# in _read_tree_state
452
self.text_size = previous_ie.text_size
455
def _read_tree_state(self, path, work_tree):
456
if self.kind == 'symlink':
457
self.symlink_target = work_tree.get_symlink_target(self.file_id)
458
if self.kind == 'file':
459
self.text_sha1 = work_tree.get_file_sha1(self.file_id)
460
self.executable = work_tree.is_executable(self.file_id)
463
class RootEntry(InventoryEntry):
464
def __init__(self, file_id):
465
self.file_id = file_id
467
self.kind = 'root_directory'
468
self.parent_id = None
471
def __eq__(self, other):
472
if not isinstance(other, RootEntry):
473
return NotImplemented
475
return (self.file_id == other.file_id) \
476
and (self.children == other.children)
480
class Inventory(object):
481
"""Inventory of versioned files in a tree.
483
This describes which file_id is present at each point in the tree,
484
and possibly the SHA-1 or other information about the file.
485
Entries can be looked up either by path or by file_id.
487
The inventory represents a typical unix file tree, with
488
directories containing files and subdirectories. We never store
489
the full path to a file, because renaming a directory implicitly
490
moves all of its contents. This class internally maintains a
491
lookup tree that allows the children under a directory to be
494
InventoryEntry objects must not be modified after they are
495
inserted, other than through the Inventory API.
497
>>> inv = Inventory()
498
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
499
InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT')
500
>>> inv['123-123'].name
503
May be treated as an iterator or set to look up file ids:
505
>>> bool(inv.path2id('hello.c'))
510
May also look up by name:
512
>>> [x[0] for x in inv.iter_entries()]
514
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
515
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
516
InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT-12345678-12345678')
518
def __init__(self, root_id=ROOT_ID):
519
"""Create or read an inventory.
521
If a working directory is specified, the inventory is read
522
from there. If the file is specified, read from that. If not,
523
the inventory is created empty.
525
The inventory is created with a default root directory, with
528
# We are letting Branch.initialize() create a unique inventory
529
# root id. Rather than generating a random one here.
531
# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')
532
self.root = RootEntry(root_id)
533
self._byid = {self.root.file_id: self.root}
537
other = Inventory(self.root.file_id)
538
# copy recursively so we know directories will be added before
539
# their children. There are more efficient ways than this...
540
for path, entry in self.iter_entries():
541
if entry == self.root:
543
other.add(entry.copy())
548
return iter(self._byid)
552
"""Returns number of entries."""
553
return len(self._byid)
556
def iter_entries(self, from_dir=None):
557
"""Return (path, entry) pairs, in order by name."""
561
elif isinstance(from_dir, basestring):
562
from_dir = self._byid[from_dir]
564
kids = from_dir.children.items()
566
for name, ie in kids:
568
if ie.kind == 'directory':
569
for cn, cie in self.iter_entries(from_dir=ie.file_id):
570
yield os.path.join(name, cn), cie
574
"""Return list of (path, ie) for all entries except the root.
576
This may be faster than iter_entries.
579
def descend(dir_ie, dir_path):
580
kids = dir_ie.children.items()
582
for name, ie in kids:
583
child_path = os.path.join(dir_path, name)
584
accum.append((child_path, ie))
585
if ie.kind == 'directory':
586
descend(ie, child_path)
588
descend(self.root, '')
592
def directories(self):
593
"""Return (path, entry) pairs for all directories, including the root.
596
def descend(parent_ie, parent_path):
597
accum.append((parent_path, parent_ie))
599
kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']
602
for name, child_ie in kids:
603
child_path = os.path.join(parent_path, name)
604
descend(child_ie, child_path)
605
descend(self.root, '')
610
def __contains__(self, file_id):
611
"""True if this entry contains a file with given id.
613
>>> inv = Inventory()
614
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
615
InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')
621
return file_id in self._byid
624
def __getitem__(self, file_id):
625
"""Return the entry for given file_id.
627
>>> inv = Inventory()
628
>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))
629
InventoryEntry('123123', 'hello.c', kind='file', parent_id='TREE_ROOT')
630
>>> inv['123123'].name
634
return self._byid[file_id]
637
raise BzrError("can't look up file_id None")
639
raise BzrError("file_id {%s} not in inventory" % file_id)
642
def get_file_kind(self, file_id):
643
return self._byid[file_id].kind
645
def get_child(self, parent_id, filename):
646
return self[parent_id].children.get(filename)
649
def add(self, entry):
650
"""Add entry to inventory.
652
To add a file to a branch ready to be committed, use Branch.add,
655
Returns the new entry object.
657
if entry.file_id in self._byid:
658
raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)
660
if entry.parent_id == ROOT_ID or entry.parent_id is None:
661
entry.parent_id = self.root.file_id
664
parent = self._byid[entry.parent_id]
666
raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)
668
if parent.children.has_key(entry.name):
669
raise BzrError("%s is already versioned" %
670
appendpath(self.id2path(parent.file_id), entry.name))
672
self._byid[entry.file_id] = entry
673
parent.children[entry.name] = entry
677
def add_path(self, relpath, kind, file_id=None):
678
"""Add entry from a path.
680
The immediate parent must already be versioned.
682
Returns the new entry object."""
683
from bzrlib.branch import gen_file_id
685
parts = bzrlib.osutils.splitpath(relpath)
687
raise BzrError("cannot re-add root of inventory")
690
file_id = gen_file_id(relpath)
692
parent_path = parts[:-1]
693
parent_id = self.path2id(parent_path)
694
if parent_id == None:
695
raise NotVersionedError(parent_path)
697
ie = InventoryEntry(file_id, parts[-1],
698
kind=kind, parent_id=parent_id)
702
def __delitem__(self, file_id):
703
"""Remove entry by id.
705
>>> inv = Inventory()
706
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
707
InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')
716
assert self[ie.parent_id].children[ie.name] == ie
718
# TODO: Test deleting all children; maybe hoist to a separate
720
if ie.kind == 'directory':
721
for cie in ie.children.values():
722
del self[cie.file_id]
725
del self._byid[file_id]
726
del self[ie.parent_id].children[ie.name]
729
def __eq__(self, other):
730
"""Compare two sets by comparing their contents.
736
>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
737
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
740
>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
741
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
745
if not isinstance(other, Inventory):
746
return NotImplemented
748
if len(self._byid) != len(other._byid):
749
# shortcut: obviously not the same
752
return self._byid == other._byid
755
def __ne__(self, other):
756
return not self.__eq__(other)
760
raise ValueError('not hashable')
763
def get_idpath(self, file_id):
764
"""Return a list of file_ids for the path to an entry.
766
The list contains one element for each directory followed by
767
the id of the file itself. So the length of the returned list
768
is equal to the depth of the file in the tree, counting the
769
root directory as depth 1.
772
while file_id != None:
774
ie = self._byid[file_id]
776
raise BzrError("file_id {%s} not found in inventory" % file_id)
777
p.insert(0, ie.file_id)
778
file_id = ie.parent_id
782
def id2path(self, file_id):
783
"""Return as a list the path to file_id."""
785
# get all names, skipping root
786
p = [self._byid[fid].name for fid in self.get_idpath(file_id)[1:]]
787
return os.sep.join(p)
791
def path2id(self, name):
792
"""Walk down through directories to return entry of last component.
794
names may be either a list of path components, or a single
795
string, in which case it is automatically split.
797
This returns the entry of the last component in the path,
798
which may be either a file or a directory.
800
Returns None iff the path is not found.
802
if isinstance(name, types.StringTypes):
803
name = splitpath(name)
805
mutter("lookup path %r" % name)
810
cie = parent.children[f]
812
assert cie.parent_id == parent.file_id
818
return parent.file_id
821
def has_filename(self, names):
822
return bool(self.path2id(names))
825
def has_id(self, file_id):
826
return self._byid.has_key(file_id)
829
def rename(self, file_id, new_parent_id, new_name):
830
"""Move a file within the inventory.
832
This can change either the name, or the parent, or both.
834
This does not move the working file."""
835
if not is_valid_name(new_name):
836
raise BzrError("not an acceptable filename: %r" % new_name)
838
new_parent = self._byid[new_parent_id]
839
if new_name in new_parent.children:
840
raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))
842
new_parent_idpath = self.get_idpath(new_parent_id)
843
if file_id in new_parent_idpath:
844
raise BzrError("cannot move directory %r into a subdirectory of itself, %r"
845
% (self.id2path(file_id), self.id2path(new_parent_id)))
847
file_ie = self._byid[file_id]
848
old_parent = self._byid[file_ie.parent_id]
850
# TODO: Don't leave things messed up if this fails
852
del old_parent.children[file_ie.name]
853
new_parent.children[new_name] = file_ie
855
file_ie.name = new_name
856
file_ie.parent_id = new_parent_id
863
def is_valid_name(name):
866
_NAME_RE = re.compile(r'^[^/\\]+$')
868
return bool(_NAME_RE.match(name))