1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""WorkingTree4 format and implementation.
19
WorkingTree4 provides the dirstate based working tree logic.
21
To get a WorkingTree, call bzrdir.open_workingtree() or
22
WorkingTree.open(dir).
25
from cStringIO import StringIO
28
from bzrlib.lazy_import import lazy_import
29
lazy_import(globals(), """
30
from bisect import bisect_left
32
from copy import deepcopy
43
conflicts as _mod_conflicts,
59
from bzrlib.transport import get_transport
63
from bzrlib import symbol_versioning
64
from bzrlib.decorators import needs_read_lock, needs_write_lock
65
from bzrlib.inventory import InventoryEntry, Inventory, ROOT_ID, make_entry
66
from bzrlib.lockable_files import LockableFiles, TransportLock
67
from bzrlib.lockdir import LockDir
68
import bzrlib.mutabletree
69
from bzrlib.mutabletree import needs_tree_write_lock
70
from bzrlib.osutils import (
82
from bzrlib.trace import mutter, note
83
from bzrlib.transport.local import LocalTransport
84
from bzrlib.progress import DummyProgress, ProgressPhase
85
from bzrlib.revision import NULL_REVISION, CURRENT_REVISION
86
from bzrlib.rio import RioReader, rio_file, Stanza
87
from bzrlib.symbol_versioning import (deprecated_passed,
95
from bzrlib.tree import Tree
96
from bzrlib.workingtree import WorkingTree3, WorkingTreeFormat3
99
class WorkingTree4(WorkingTree3):
100
"""This is the Format 4 working tree.
102
This differs from WorkingTree3 by:
103
- having a consolidated internal dirstate.
104
- not having a regular inventory attribute.
106
This is new in bzr TODO FIXME SETMEBEFORE MERGE.
109
def __init__(self, basedir,
114
"""Construct a WorkingTree for basedir.
116
If the branch is not supplied, it is opened automatically.
117
If the branch is supplied, it must be the branch for this basedir.
118
(branch.base is not cross checked, because for remote branches that
119
would be meaningless).
121
self._format = _format
122
self.bzrdir = _bzrdir
123
from bzrlib.hashcache import HashCache
124
from bzrlib.trace import note, mutter
125
assert isinstance(basedir, basestring), \
126
"base directory %r is not a string" % basedir
127
basedir = safe_unicode(basedir)
128
mutter("opening working tree %r", basedir)
129
self._branch = branch
130
assert isinstance(self.branch, bzrlib.branch.Branch), \
131
"branch %r is not a Branch" % self.branch
132
self.basedir = realpath(basedir)
133
# if branch is at our basedir and is a format 6 or less
134
# assume all other formats have their own control files.
135
assert isinstance(_control_files, LockableFiles), \
136
"_control_files must be a LockableFiles, not %r" % _control_files
137
self._control_files = _control_files
138
# update the whole cache up front and write to disk if anything changed;
139
# in the future we might want to do this more selectively
140
# two possible ways offer themselves : in self._unlock, write the cache
141
# if needed, or, when the cache sees a change, append it to the hash
142
# cache file, and have the parser take the most recent entry for a
144
cache_filename = self.bzrdir.get_workingtree_transport(None).local_abspath('stat-cache')
145
hc = self._hashcache = HashCache(basedir, cache_filename, self._control_files._file_mode)
147
# is this scan needed ? it makes things kinda slow.
155
self._parent_revisions = None
157
# during a read or write lock these objects are set, and are
158
# None the rest of the time.
159
self._dirstate = None
160
self._inventory = None
163
@needs_tree_write_lock
164
def _add(self, files, ids, kinds):
165
"""See MutableTree._add."""
166
state = self.current_dirstate()
167
for f, file_id, kind in zip(files, ids, kinds):
172
file_id = generate_ids.gen_file_id(f)
173
# deliberately add the file with no cached stat or sha1
174
# - on the first access it will be gathered, and we can
175
# always change this once tests are all passing.
176
state.add(f, file_id, kind, None, '')
179
def current_dirstate(self):
180
"""Return the current dirstate object.
182
This is not part of the tree interface and only exposed for ease of
185
:raises errors.NotWriteLocked: when not in a lock.
186
XXX: This should probably be errors.NotLocked.
188
if not self._control_files._lock_count:
189
raise errors.ObjectNotLocked(self)
190
if self._dirstate is not None:
191
return self._dirstate
192
local_path = self.bzrdir.get_workingtree_transport(None
193
).local_abspath('dirstate')
194
self._dirstate = dirstate.DirState.on_file(local_path)
195
return self._dirstate
198
"""Write all cached data to disk."""
199
if self._control_files._lock_mode != 'w':
200
raise errors.NotWriteLocked(self)
201
self.current_dirstate().save()
202
self._inventory = None
205
def _generate_inventory(self):
206
"""Create and set self.inventory from the dirstate object.
208
This is relatively expensive: we have to walk the entire dirstate.
209
Ideally we would not, and can deprecate this function.
211
dirstate = self.current_dirstate()
212
rows = self._dirstate._iter_rows()
213
root_row = rows.next()
214
inv = Inventory(root_id=root_row[0][3].decode('utf8'))
216
dirname, name, kind, fileid_utf8, size, stat, link_or_sha1 = line[0]
218
# not in this revision tree.
220
parent_id = inv[inv.path2id(dirname.decode('utf8'))].file_id
221
file_id = fileid_utf8.decode('utf8')
222
entry = make_entry(kind, name.decode('utf8'), parent_id, file_id)
224
#entry.executable = executable
225
#entry.text_size = size
226
#entry.text_sha1 = sha1
229
self._inventory = inv
231
def get_file_sha1(self, file_id, path=None, stat_value=None):
233
# path = self.inventory.id2path(file_id)
234
# # now lookup row by path
235
row, parents = self._get_row(file_id=file_id)
236
assert row is not None, 'what error should this raise'
238
# if row stat is valid, use cached sha1, else, get a new sha1.
239
path = (row[0] + '/' + row[1]).strip('/').decode('utf8')
240
return self._hashcache.get_sha1(path, stat_value)
242
def _get_inventory(self):
243
"""Get the inventory for the tree. This is only valid within a lock."""
244
if self._inventory is not None:
245
return self._inventory
246
self._generate_inventory()
247
return self._inventory
249
inventory = property(_get_inventory,
250
doc="Inventory of this Tree")
253
def get_parent_ids(self):
254
"""See Tree.get_parent_ids.
256
This implementation requests the ids list from the dirstate file.
258
return self.current_dirstate().get_parent_ids()
261
def get_root_id(self):
262
"""Return the id of this trees root"""
263
return self.current_dirstate()._iter_rows().next()[0][3].decode('utf8')
265
def _get_row(self, file_id):
266
"""Get the dirstate row for file_id."""
267
state = self.current_dirstate()
268
fileid_utf8 = file_id.encode('utf8')
269
for row in state._iter_rows():
270
if row[0][3] == fileid_utf8:
274
def has_id(self, file_id):
275
state = self.current_dirstate()
276
fileid_utf8 = file_id.encode('utf8')
277
row, parents = self._get_row(file_id)
280
return osutils.lexists(pathjoin(
281
self.basedir, row[0].decode('utf8'), row[1].decode('utf8')))
284
def id2path(self, fileid):
285
state = self.current_dirstate()
286
fileid_utf8 = fileid.encode('utf8')
287
for row, parents in state._iter_rows():
288
if row[3] == fileid_utf8:
289
return (row[0] + '/' + row[1]).decode('utf8').strip('/')
293
"""Iterate through file_ids for this tree.
295
file_ids are in a WorkingTree if they are in the working inventory
296
and the working file exists.
299
for row, parents in self.current_dirstate()._iter_rows():
302
path = pathjoin(self.basedir, row[0].decode('utf8'), row[1].decode('utf8'))
303
if osutils.lexists(path):
304
result.append(row[3].decode('utf8'))
308
def _last_revision(self):
309
"""See Mutable.last_revision."""
310
parent_ids = self.current_dirstate().get_parent_ids()
312
return parent_ids[0].decode('utf8')
317
"""Initialize the state in this tree to be a new tree."""
318
self._parent_revisions = [NULL_REVISION]
322
def path2id(self, path):
323
"""Return the id for path in this tree."""
324
state = self.current_dirstate()
325
path_utf8 = os.path.split(path.encode('utf8'))
326
for row, parents in state._iter_rows():
327
if row[0:2] == path_utf8:
328
return row[3].decode('utf8')
331
def read_working_inventory(self):
332
"""Read the working inventory.
334
This is a meaningless operation for dirstate, but we obey it anyhow.
336
return self.inventory
339
def revision_tree(self, revision_id):
340
"""See Tree.revision_tree.
342
WorkingTree4 supplies revision_trees for any basis tree.
344
dirstate = self.current_dirstate()
345
parent_ids = dirstate.get_parent_ids()
346
if revision_id not in parent_ids:
347
raise errors.NoSuchRevisionInTree(self, revision_id)
348
if revision_id in dirstate.get_ghosts():
349
raise errors.NoSuchRevisionInTree(self, revision_id)
350
return DirStateRevisionTree(dirstate, revision_id,
351
self.branch.repository)
353
@needs_tree_write_lock
354
def set_last_revision(self, new_revision):
355
"""Change the last revision in the working tree."""
356
parents = self.get_parent_ids()
357
if new_revision in (NULL_REVISION, None):
358
assert parents == [], (
359
"setting the last parent to none with a pending merge is "
361
self.set_parent_ids([])
363
self.set_parent_ids([new_revision] + parents[1:])
365
@needs_tree_write_lock
366
def set_parent_ids(self, revision_ids, allow_leftmost_as_ghost=False):
367
"""Set the parent ids to revision_ids.
369
See also set_parent_trees. This api will try to retrieve the tree data
370
for each element of revision_ids from the trees repository. If you have
371
tree data already available, it is more efficient to use
372
set_parent_trees rather than set_parent_ids. set_parent_ids is however
373
an easier API to use.
375
:param revision_ids: The revision_ids to set as the parent ids of this
376
working tree. Any of these may be ghosts.
379
for revision_id in revision_ids:
381
revtree = self.branch.repository.revision_tree(revision_id)
382
# TODO: jam 20070213 KnitVersionedFile raises
383
# RevisionNotPresent rather than NoSuchRevision if a
384
# given revision_id is not present. Should Repository be
385
# catching it and re-raising NoSuchRevision?
386
except (errors.NoSuchRevision, errors.RevisionNotPresent):
388
trees.append((revision_id, revtree))
389
self.set_parent_trees(trees,
390
allow_leftmost_as_ghost=allow_leftmost_as_ghost)
392
@needs_tree_write_lock
393
def set_parent_trees(self, parents_list, allow_leftmost_as_ghost=False):
394
"""Set the parents of the working tree.
396
:param parents_list: A list of (revision_id, tree) tuples.
397
If tree is None, then that element is treated as an unreachable
398
parent tree - i.e. a ghost.
400
dirstate = self.current_dirstate()
401
if len(parents_list) > 0:
402
if not allow_leftmost_as_ghost and parents_list[0][1] is None:
403
raise errors.GhostRevisionUnusableHere(parents_list[0][0])
406
# convert absent trees to the null tree, which we convert back to
408
for rev_id, tree in parents_list:
410
real_trees.append((rev_id, tree))
412
real_trees.append((rev_id,
413
self.branch.repository.revision_tree(None)))
414
ghosts.append(rev_id)
415
dirstate.set_parent_trees(real_trees, ghosts=ghosts)
418
def _set_root_id(self, file_id):
419
"""See WorkingTree.set_root_id."""
420
state = self.current_dirstate()
421
state.set_path_id('', file_id)
422
self._dirty = state._dirblock_state == dirstate.DirState.IN_MEMORY_MODIFIED
425
"""Unlock in format 4 trees needs to write the entire dirstate."""
426
if self._control_files._lock_count == 1:
427
self._write_hashcache_if_dirty()
428
# eventually we should do signature checking during read locks for
430
if self._control_files._lock_mode == 'w':
433
self._dirstate = None
434
self._inventory = None
435
# reverse order of locking.
437
return self._control_files.unlock()
441
@needs_tree_write_lock
442
def unversion(self, file_ids):
443
"""Remove the file ids in file_ids from the current versioned set.
445
When a file_id is unversioned, all of its children are automatically
448
:param file_ids: The file ids to stop versioning.
449
:raises: NoSuchId if any fileid is not currently versioned.
453
state = self.current_dirstate()
454
state._read_dirblocks_if_needed()
455
ids_to_unversion = set()
456
for fileid in file_ids:
457
ids_to_unversion.add(fileid.encode('utf8'))
458
paths_to_unversion = set()
460
# check if the root is to be unversioned, if so, assert for now.
461
# make a copy of the _dirblocks data
463
# skip paths in paths_to_unversion
464
# skip ids in ids_to_unversion, and add their paths to
465
# paths_to_unversion if they are a directory
466
# if there are any un-unversioned ids at the end, raise
467
if state._root_row[0][3] in ids_to_unversion:
468
# I haven't written the code to unversion / yet - it should be
470
raise errors.BzrError('Unversioning the / is not currently supported')
473
for block in state._dirblocks:
474
# first check: is the path one to remove - it or its children
476
for path in paths_to_unversion:
477
if (block[0].startswith(path) and
478
(len(block[0]) == len(path) or
479
block[0][len(path)] == '/')):
480
# this path should be deleted
483
# TODO: trim paths_to_unversion as we pass by paths
485
# this block is to be deleted. skip it.
487
# copy undeleted rows from within the the block
488
new_blocks.append((block[0], []))
489
new_row = new_blocks[-1][1]
490
for row, row_parents in block[1]:
491
if row[3] not in ids_to_unversion:
492
new_row.append((row, row_parents))
494
# skip the row, and if its a dir mark its path to be removed
495
if row[2] == 'directory':
496
paths_to_unversion.add((row[0] + '/' + row[1]).strip('/'))
498
deleted_rows.append((row[3], row_parents))
499
ids_to_unversion.remove(row[3])
501
raise errors.NoSuchId(self, iter(ids_to_unversion).next())
502
state._dirblocks = new_blocks
503
for fileid_utf8, parents in deleted_rows:
504
state.add_deleted(fileid_utf8, parents)
505
state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
506
# have to change the legacy inventory too.
507
if self._inventory is not None:
508
for file_id in file_ids:
509
self._inventory.remove_recursive_id(file_id)
511
@needs_tree_write_lock
512
def _write_inventory(self, inv):
513
"""Write inventory as the current inventory."""
514
assert not self._dirty, "attempting to write an inventory when the dirstate is dirty will cause data loss"
515
self.current_dirstate().set_state_from_inventory(inv)
520
class WorkingTreeFormat4(WorkingTreeFormat3):
521
"""The first consolidated dirstate working tree format.
524
- exists within a metadir controlling .bzr
525
- includes an explicit version marker for the workingtree control
526
files, separate from the BzrDir format
527
- modifies the hash cache format
528
- is new in bzr TODO FIXME SETBEFOREMERGE
529
- uses a LockDir to guard access to it.
532
def get_format_string(self):
533
"""See WorkingTreeFormat.get_format_string()."""
534
return "Bazaar Working Tree format 4\n"
536
def get_format_description(self):
537
"""See WorkingTreeFormat.get_format_description()."""
538
return "Working tree format 4"
540
def initialize(self, a_bzrdir, revision_id=None):
541
"""See WorkingTreeFormat.initialize().
543
revision_id allows creating a working tree at a different
544
revision than the branch is at.
546
if not isinstance(a_bzrdir.transport, LocalTransport):
547
raise errors.NotLocalUrl(a_bzrdir.transport.base)
548
transport = a_bzrdir.get_workingtree_transport(self)
549
control_files = self._open_control_files(a_bzrdir)
550
control_files.create_lock()
551
control_files.lock_write()
552
control_files.put_utf8('format', self.get_format_string())
553
branch = a_bzrdir.open_branch()
554
if revision_id is None:
555
revision_id = branch.last_revision()
556
local_path = transport.local_abspath('dirstate')
557
dirstate.DirState.initialize(local_path)
558
wt = WorkingTree4(a_bzrdir.root_transport.local_abspath('.'),
562
_control_files=control_files)
566
#wt.current_dirstate().set_path_id('', NEWROOT)
567
wt.set_last_revision(revision_id)
569
basis = wt.basis_tree()
571
transform.build_tree(basis, wt)
574
control_files.unlock()
579
def _open(self, a_bzrdir, control_files):
580
"""Open the tree itself.
582
:param a_bzrdir: the dir for the tree.
583
:param control_files: the control files for the tree.
585
return WorkingTree4(a_bzrdir.root_transport.local_abspath('.'),
586
branch=a_bzrdir.open_branch(),
589
_control_files=control_files)
592
class DirStateRevisionTree(Tree):
593
"""A revision tree pulling the inventory from a dirstate."""
595
def __init__(self, dirstate, revision_id, repository):
596
self._dirstate = dirstate
597
self._revision_id = revision_id
598
self._repository = repository
599
self._inventory = None
602
def _comparison_data(self, entry, path):
603
"""See Tree._comparison_data."""
605
return None, False, None
606
# trust the entry as RevisionTree does, but this may not be
607
# sensible: the entry might not have come from us?
608
return entry.kind, entry.executable, None
610
def _file_size(self, entry, stat_value):
611
return entry.text_size
613
def _generate_inventory(self):
614
"""Create and set self.inventory from the dirstate object.
616
This is relatively expensive: we have to walk the entire dirstate.
617
Ideally we would not, and instead would """
618
assert self._locked, 'cannot generate inventory of an unlocked '\
619
'dirstate revision tree'
620
assert self._revision_id in self._dirstate.get_parent_ids(), \
621
'parent %s has disappeared from %s' % (
622
self._revision_id, self._dirstate.get_parent_ids())
623
parent_index = self._dirstate.get_parent_ids().index(self._revision_id)
624
rows = self._dirstate._iter_rows()
625
root_row = rows.next()
626
inv = Inventory(root_id=root_row[0][3].decode('utf8'),
627
revision_id=self._revision_id)
629
revid, kind, dirname, name, size, executable, sha1 = line[1][parent_index]
631
# not in this revision tree.
633
parent_id = inv[inv.path2id(dirname.decode('utf8'))].file_id
634
file_id = line[0][3].decode('utf8')
635
entry = make_entry(kind, name.decode('utf8'), parent_id, file_id)
636
entry.revision = revid.decode('utf8')
638
entry.executable = executable
639
entry.text_size = size
640
entry.text_sha1 = sha1
642
self._inventory = inv
644
def get_file_sha1(self, file_id, path=None, stat_value=None):
645
# TODO: if path is present, fast-path on that, as inventory
646
# might not be present
647
ie = self.inventory[file_id]
648
if ie.kind == "file":
652
def get_file(self, file_id):
653
return StringIO(self.get_file_text(file_id))
655
def get_file_lines(self, file_id):
656
ie = self.inventory[file_id]
657
return self._repository.weave_store.get_weave(file_id,
658
self._repository.get_transaction()).get_lines(ie.revision)
660
def get_file_size(self, file_id):
661
return self.inventory[file_id].text_size
663
def get_file_text(self, file_id):
664
return ''.join(self.get_file_lines(file_id))
666
def _get_inventory(self):
667
if self._inventory is not None:
668
return self._inventory
669
self._generate_inventory()
670
return self._inventory
672
inventory = property(_get_inventory,
673
doc="Inventory of this Tree")
675
def get_parent_ids(self):
676
"""The parents of a tree in the dirstate are not cached."""
677
return self._repository.get_revision(self._revision_id).parent_ids
679
def has_filename(self, filename):
680
return bool(self.inventory.path2id(filename))
682
def kind(self, file_id):
683
return self.inventory[file_id].kind
685
def is_executable(self, file_id, path=None):
686
ie = self.inventory[file_id]
687
if ie.kind != "file":
692
"""Lock the tree for a set of operations."""
696
"""Unlock, freeing any cache memory used during the lock."""
697
# outside of a lock, the inventory is suspect: release it.
700
self._inventory = None