1
# (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# TODO: Maybe also keep the full path of the entry, and the children?
19
# But those depend on its position within a particular inventory, and
20
# it would be nice not to need to hold the backpointer here.
22
# TODO: Perhaps split InventoryEntry into subclasses for files,
23
# directories, etc etc.
26
# This should really be an id randomly assigned when the tree is
27
# created, but it's not for now.
31
import sys, os.path, types, re
34
from bzrlib.errors import BzrError, BzrCheckError
36
from bzrlib.osutils import uuid, quotefn, splitpath, joinpath, appendpath
37
from bzrlib.trace import mutter
38
from bzrlib.errors import NotVersionedError
41
class InventoryEntry(object):
42
"""Description of a versioned file.
44
An InventoryEntry has the following fields, which are also
45
present in the XML inventory-entry element:
50
(within the parent directory)
56
file_id of the parent directory, or ROOT_ID
59
the revision_id in which the name or parent of this file was
63
sha-1 of the text of the file
66
size in bytes of the text of the file
69
the revision_id in which the text of this file was introduced
71
(reading a version 4 tree created a text_id field.)
76
>>> i.add(InventoryEntry('123', 'src', 'directory', ROOT_ID))
77
InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT')
78
>>> i.add(InventoryEntry('2323', 'hello.c', 'file', parent_id='123'))
79
InventoryEntry('2323', 'hello.c', kind='file', parent_id='123')
80
>>> for j in i.iter_entries():
83
('src', InventoryEntry('123', 'src', kind='directory', parent_id='TREE_ROOT'))
84
('src/hello.c', InventoryEntry('2323', 'hello.c', kind='file', parent_id='123'))
85
>>> i.add(InventoryEntry('2323', 'bye.c', 'file', '123'))
86
Traceback (most recent call last):
88
BzrError: inventory already contains entry with id {2323}
89
>>> i.add(InventoryEntry('2324', 'bye.c', 'file', '123'))
90
InventoryEntry('2324', 'bye.c', kind='file', parent_id='123')
91
>>> i.add(InventoryEntry('2325', 'wibble', 'directory', '123'))
92
InventoryEntry('2325', 'wibble', kind='directory', parent_id='123')
93
>>> i.path2id('src/wibble')
97
>>> i.add(InventoryEntry('2326', 'wibble.c', 'file', '2325'))
98
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
100
InventoryEntry('2326', 'wibble.c', kind='file', parent_id='2325')
101
>>> for j in i.iter_entries():
103
... assert i.path2id(j[0])
110
>>> i.id2path('2326')
111
'src/wibble/wibble.c'
114
__slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
115
'text_id', 'parent_id', 'children',
116
'text_version', 'entry_version', ]
119
def __init__(self, file_id, name, kind, parent_id, text_id=None):
120
"""Create an InventoryEntry
122
The filename must be a single component, relative to the
123
parent directory; it cannot be a whole path or relative name.
125
>>> e = InventoryEntry('123', 'hello.c', 'file', ROOT_ID)
130
>>> e = InventoryEntry('123', 'src/hello.c', 'file', ROOT_ID)
131
Traceback (most recent call last):
132
BzrCheckError: InventoryEntry name 'src/hello.c' is invalid
134
assert isinstance(name, basestring), name
135
if '/' in name or '\\' in name:
136
raise BzrCheckError('InventoryEntry name %r is invalid' % name)
138
self.text_version = None
139
self.entry_version = None
140
self.text_sha1 = None
141
self.text_size = None
142
self.file_id = file_id
145
self.text_id = text_id
146
self.parent_id = parent_id
147
if kind == 'directory':
152
raise BzrError("unhandled entry kind %r" % kind)
156
def sorted_children(self):
157
l = self.children.items()
163
other = InventoryEntry(self.file_id, self.name, self.kind,
165
other.text_id = self.text_id
166
other.text_sha1 = self.text_sha1
167
other.text_size = self.text_size
168
# note that children are *not* copied; they're pulled across when
174
return ("%s(%r, %r, kind=%r, parent_id=%r)"
175
% (self.__class__.__name__,
182
def __eq__(self, other):
183
if not isinstance(other, InventoryEntry):
184
return NotImplemented
186
return (self.file_id == other.file_id) \
187
and (self.name == other.name) \
188
and (self.text_sha1 == other.text_sha1) \
189
and (self.text_size == other.text_size) \
190
and (self.text_id == other.text_id) \
191
and (self.parent_id == other.parent_id) \
192
and (self.kind == other.kind) \
193
and (self.text_version == other.text_version) \
194
and (self.entry_version == other.entry_version)
197
def __ne__(self, other):
198
return not (self == other)
201
raise ValueError('not hashable')
205
class RootEntry(InventoryEntry):
206
def __init__(self, file_id):
207
self.file_id = file_id
209
self.kind = 'root_directory'
210
self.parent_id = None
213
def __eq__(self, other):
214
if not isinstance(other, RootEntry):
215
return NotImplemented
217
return (self.file_id == other.file_id) \
218
and (self.children == other.children)
222
class Inventory(object):
223
"""Inventory of versioned files in a tree.
225
This describes which file_id is present at each point in the tree,
226
and possibly the SHA-1 or other information about the file.
227
Entries can be looked up either by path or by file_id.
229
The inventory represents a typical unix file tree, with
230
directories containing files and subdirectories. We never store
231
the full path to a file, because renaming a directory implicitly
232
moves all of its contents. This class internally maintains a
233
lookup tree that allows the children under a directory to be
236
InventoryEntry objects must not be modified after they are
237
inserted, other than through the Inventory API.
239
>>> inv = Inventory()
240
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
241
InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT')
242
>>> inv['123-123'].name
245
May be treated as an iterator or set to look up file ids:
247
>>> bool(inv.path2id('hello.c'))
252
May also look up by name:
254
>>> [x[0] for x in inv.iter_entries()]
256
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
257
>>> inv.add(InventoryEntry('123-123', 'hello.c', 'file', ROOT_ID))
258
InventoryEntry('123-123', 'hello.c', kind='file', parent_id='TREE_ROOT-12345678-12345678')
260
def __init__(self, root_id=ROOT_ID):
261
"""Create or read an inventory.
263
If a working directory is specified, the inventory is read
264
from there. If the file is specified, read from that. If not,
265
the inventory is created empty.
267
The inventory is created with a default root directory, with
270
# We are letting Branch(init=True) create a unique inventory
271
# root id. Rather than generating a random one here.
273
# root_id = bzrlib.branch.gen_file_id('TREE_ROOT')
274
self.root = RootEntry(root_id)
275
self._byid = {self.root.file_id: self.root}
279
other = Inventory(self.root.file_id)
280
# copy recursively so we know directories will be added before
281
# their children. There are more efficient ways than this...
282
for path, entry in self.iter_entries():
283
if entry == self.root:
285
other.add(entry.copy())
290
return iter(self._byid)
294
"""Returns number of entries."""
295
return len(self._byid)
298
def iter_entries(self, from_dir=None):
299
"""Return (path, entry) pairs, in order by name."""
303
elif isinstance(from_dir, basestring):
304
from_dir = self._byid[from_dir]
306
kids = from_dir.children.items()
308
for name, ie in kids:
310
if ie.kind == 'directory':
311
for cn, cie in self.iter_entries(from_dir=ie.file_id):
312
yield os.path.join(name, cn), cie
316
"""Return list of (path, ie) for all entries except the root.
318
This may be faster than iter_entries.
321
def descend(dir_ie, dir_path):
322
kids = dir_ie.children.items()
324
for name, ie in kids:
325
child_path = os.path.join(dir_path, name)
326
accum.append((child_path, ie))
327
if ie.kind == 'directory':
328
descend(ie, child_path)
330
descend(self.root, '')
334
def directories(self):
335
"""Return (path, entry) pairs for all directories, including the root.
338
def descend(parent_ie, parent_path):
339
accum.append((parent_path, parent_ie))
341
kids = [(ie.name, ie) for ie in parent_ie.children.itervalues() if ie.kind == 'directory']
344
for name, child_ie in kids:
345
child_path = os.path.join(parent_path, name)
346
descend(child_ie, child_path)
347
descend(self.root, '')
352
def __contains__(self, file_id):
353
"""True if this entry contains a file with given id.
355
>>> inv = Inventory()
356
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
357
InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')
363
return file_id in self._byid
366
def __getitem__(self, file_id):
367
"""Return the entry for given file_id.
369
>>> inv = Inventory()
370
>>> inv.add(InventoryEntry('123123', 'hello.c', 'file', ROOT_ID))
371
InventoryEntry('123123', 'hello.c', kind='file', parent_id='TREE_ROOT')
372
>>> inv['123123'].name
376
return self._byid[file_id]
379
raise BzrError("can't look up file_id None")
381
raise BzrError("file_id {%s} not in inventory" % file_id)
384
def get_file_kind(self, file_id):
385
return self._byid[file_id].kind
387
def get_child(self, parent_id, filename):
388
return self[parent_id].children.get(filename)
391
def add(self, entry):
392
"""Add entry to inventory.
394
To add a file to a branch ready to be committed, use Branch.add,
397
Returns the new entry object.
399
if entry.file_id in self._byid:
400
raise BzrError("inventory already contains entry with id {%s}" % entry.file_id)
402
if entry.parent_id == ROOT_ID or entry.parent_id is None:
403
entry.parent_id = self.root.file_id
406
parent = self._byid[entry.parent_id]
408
raise BzrError("parent_id {%s} not in inventory" % entry.parent_id)
410
if parent.children.has_key(entry.name):
411
raise BzrError("%s is already versioned" %
412
appendpath(self.id2path(parent.file_id), entry.name))
414
self._byid[entry.file_id] = entry
415
parent.children[entry.name] = entry
419
def add_path(self, relpath, kind, file_id=None):
420
"""Add entry from a path.
422
The immediate parent must already be versioned.
424
Returns the new entry object."""
425
from bzrlib.branch import gen_file_id
427
parts = bzrlib.osutils.splitpath(relpath)
429
raise BzrError("cannot re-add root of inventory")
432
file_id = gen_file_id(relpath)
434
parent_path = parts[:-1]
435
parent_id = self.path2id(parent_path)
436
if parent_id == None:
437
raise NotVersionedError(parent_path)
439
ie = InventoryEntry(file_id, parts[-1],
440
kind=kind, parent_id=parent_id)
444
def __delitem__(self, file_id):
445
"""Remove entry by id.
447
>>> inv = Inventory()
448
>>> inv.add(InventoryEntry('123', 'foo.c', 'file', ROOT_ID))
449
InventoryEntry('123', 'foo.c', kind='file', parent_id='TREE_ROOT')
458
assert self[ie.parent_id].children[ie.name] == ie
460
# TODO: Test deleting all children; maybe hoist to a separate
462
if ie.kind == 'directory':
463
for cie in ie.children.values():
464
del self[cie.file_id]
467
del self._byid[file_id]
468
del self[ie.parent_id].children[ie.name]
471
def __eq__(self, other):
472
"""Compare two sets by comparing their contents.
478
>>> i1.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
479
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
482
>>> i2.add(InventoryEntry('123', 'foo', 'file', ROOT_ID))
483
InventoryEntry('123', 'foo', kind='file', parent_id='TREE_ROOT')
487
if not isinstance(other, Inventory):
488
return NotImplemented
490
if len(self._byid) != len(other._byid):
491
# shortcut: obviously not the same
494
return self._byid == other._byid
497
def __ne__(self, other):
498
return not (self == other)
502
raise ValueError('not hashable')
505
def get_idpath(self, file_id):
506
"""Return a list of file_ids for the path to an entry.
508
The list contains one element for each directory followed by
509
the id of the file itself. So the length of the returned list
510
is equal to the depth of the file in the tree, counting the
511
root directory as depth 1.
514
while file_id != None:
516
ie = self._byid[file_id]
518
raise BzrError("file_id {%s} not found in inventory" % file_id)
519
p.insert(0, ie.file_id)
520
file_id = ie.parent_id
524
def id2path(self, file_id):
525
"""Return as a list the path to file_id."""
527
# get all names, skipping root
528
p = [self._byid[fid].name for fid in self.get_idpath(file_id)[1:]]
529
return os.sep.join(p)
533
def path2id(self, name):
534
"""Walk down through directories to return entry of last component.
536
names may be either a list of path components, or a single
537
string, in which case it is automatically split.
539
This returns the entry of the last component in the path,
540
which may be either a file or a directory.
542
Returns None iff the path is not found.
544
if isinstance(name, types.StringTypes):
545
name = splitpath(name)
547
mutter("lookup path %r" % name)
552
cie = parent.children[f]
554
assert cie.parent_id == parent.file_id
560
return parent.file_id
563
def has_filename(self, names):
564
return bool(self.path2id(names))
567
def has_id(self, file_id):
568
return self._byid.has_key(file_id)
571
def rename(self, file_id, new_parent_id, new_name):
572
"""Move a file within the inventory.
574
This can change either the name, or the parent, or both.
576
This does not move the working file."""
577
if not is_valid_name(new_name):
578
raise BzrError("not an acceptable filename: %r" % new_name)
580
new_parent = self._byid[new_parent_id]
581
if new_name in new_parent.children:
582
raise BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id)))
584
new_parent_idpath = self.get_idpath(new_parent_id)
585
if file_id in new_parent_idpath:
586
raise BzrError("cannot move directory %r into a subdirectory of itself, %r"
587
% (self.id2path(file_id), self.id2path(new_parent_id)))
589
file_ie = self._byid[file_id]
590
old_parent = self._byid[file_ie.parent_id]
592
# TODO: Don't leave things messed up if this fails
594
del old_parent.children[file_ie.name]
595
new_parent.children[new_name] = file_ie
597
file_ie.name = new_name
598
file_ie.parent_id = new_parent_id
605
def is_valid_name(name):
608
_NAME_RE = re.compile(r'^[^/\\]+$')
610
return bool(_NAME_RE.match(name))