1
# Copyright (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
import sys, os, os.path, random, time, sha, sets, types, re, shutil, tempfile
21
import traceback, socket, fnmatch, difflib, time
22
from binascii import hexlify
25
from inventory import Inventory
26
from trace import mutter, note
27
from tree import Tree, EmptyTree, RevisionTree, WorkingTree
28
from inventory import InventoryEntry, Inventory
29
from osutils import isdir, quotefn, isfile, uuid, sha_file, username, chomp, \
30
format_date, compact_date, pumpfile, user_email, rand_bytes, splitpath, \
31
joinpath, sha_string, file_kind, local_time_offset, appendpath
32
from store import ImmutableStore
33
from revision import Revision
34
from errors import bailout, BzrError
35
from textui import show_status
36
from diff import diff_trees
38
BZR_BRANCH_FORMAT = "Bazaar-NG branch, format 0.0.4\n"
39
## TODO: Maybe include checks for common corruption of newlines, etc?
43
def find_branch_root(f=None):
44
"""Find the branch root enclosing f, or pwd.
46
It is not necessary that f exists.
48
Basically we keep looking up until we find the control directory or
52
elif hasattr(os.path, 'realpath'):
53
f = os.path.realpath(f)
55
f = os.path.abspath(f)
60
if os.path.exists(os.path.join(f, bzrlib.BZRDIR)):
62
head, tail = os.path.split(f)
64
# reached the root, whatever that may be
65
raise BzrError('%r is not in a branch' % orig_f)
70
######################################################################
74
"""Branch holding a history of revisions.
76
:todo: Perhaps use different stores for different classes of object,
77
so that we can keep track of how much space each one uses,
78
or garbage-collect them.
80
:todo: Add a RemoteBranch subclass. For the basic case of read-only
81
HTTP access this should be very easy by,
82
just redirecting controlfile access into HTTP requests.
83
We would need a RemoteStore working similarly.
85
:todo: Keep the on-disk branch locked while the object exists.
87
:todo: mkdir() method.
89
def __init__(self, base, init=False, find_root=True):
90
"""Create new branch object at a particular location.
92
:param base: Base directory for the branch.
94
:param init: If True, create new control files in a previously
95
unversioned directory. If False, the branch must already
98
:param find_root: If true and init is false, find the root of the
99
existing branch containing base.
101
In the test suite, creation of new trees is tested using the
102
`ScratchBranch` class.
105
self.base = os.path.realpath(base)
108
self.base = find_branch_root(base)
110
self.base = os.path.realpath(base)
111
if not isdir(self.controlfilename('.')):
112
bailout("not a bzr branch: %s" % quotefn(base),
113
['use "bzr init" to initialize a new working tree',
114
'current bzr can only operate from top-of-tree'])
117
self.text_store = ImmutableStore(self.controlfilename('text-store'))
118
self.revision_store = ImmutableStore(self.controlfilename('revision-store'))
119
self.inventory_store = ImmutableStore(self.controlfilename('inventory-store'))
123
return '%s(%r)' % (self.__class__.__name__, self.base)
129
def abspath(self, name):
130
"""Return absolute filename for something in the branch"""
131
return os.path.join(self.base, name)
134
def relpath(self, path):
135
"""Return path relative to this branch of something inside it.
137
Raises an error if path is not in this branch."""
138
rp = os.path.realpath(path)
140
if not rp.startswith(self.base):
141
bailout("path %r is not within branch %r" % (rp, self.base))
142
rp = rp[len(self.base):]
143
rp = rp.lstrip(os.sep)
147
def controlfilename(self, file_or_path):
148
"""Return location relative to branch."""
149
if isinstance(file_or_path, types.StringTypes):
150
file_or_path = [file_or_path]
151
return os.path.join(self.base, bzrlib.BZRDIR, *file_or_path)
154
def controlfile(self, file_or_path, mode='r'):
155
"""Open a control file for this branch"""
156
return file(self.controlfilename(file_or_path), mode)
159
def _make_control(self):
160
os.mkdir(self.controlfilename([]))
161
self.controlfile('README', 'w').write(
162
"This is a Bazaar-NG control directory.\n"
163
"Do not change any files in this directory.")
164
self.controlfile('branch-format', 'wb').write(BZR_BRANCH_FORMAT)
165
for d in ('text-store', 'inventory-store', 'revision-store'):
166
os.mkdir(self.controlfilename(d))
167
for f in ('revision-history', 'merged-patches',
168
'pending-merged-patches', 'branch-name'):
169
self.controlfile(f, 'w').write('')
170
mutter('created control directory in ' + self.base)
171
Inventory().write_xml(self.controlfile('inventory','w'))
174
def _check_format(self):
175
"""Check this branch format is supported.
177
The current tool only supports the current unstable format.
179
In the future, we might need different in-memory Branch
180
classes to support downlevel branches. But not yet.
182
# This ignores newlines so that we can open branches created
183
# on Windows from Linux and so on. I think it might be better
184
# to always make all internal files in unix format.
185
fmt = self.controlfile('branch-format', 'rb').read()
186
fmt.replace('\r\n', '')
187
if fmt != BZR_BRANCH_FORMAT:
188
bailout('sorry, branch format %r not supported' % fmt,
189
['use a different bzr version',
190
'or remove the .bzr directory and "bzr init" again'])
193
def read_working_inventory(self):
194
"""Read the working inventory."""
196
inv = Inventory.read_xml(self.controlfile('inventory', 'r'))
197
mutter("loaded inventory of %d items in %f"
198
% (len(inv), time.time() - before))
202
def _write_inventory(self, inv):
203
"""Update the working inventory.
205
That is to say, the inventory describing changes underway, that
206
will be committed to the next revision.
208
## TODO: factor out to atomicfile? is rename safe on windows?
209
## TODO: Maybe some kind of clean/dirty marker on inventory?
210
tmpfname = self.controlfilename('inventory.tmp')
211
tmpf = file(tmpfname, 'w')
214
inv_fname = self.controlfilename('inventory')
215
if sys.platform == 'win32':
217
os.rename(tmpfname, inv_fname)
218
mutter('wrote working inventory')
221
inventory = property(read_working_inventory, _write_inventory, None,
222
"""Inventory for the working copy.""")
225
def add(self, files, verbose=False):
226
"""Make files versioned.
228
This puts the files in the Added state, so that they will be
229
recorded by the next commit.
231
:todo: Perhaps have an option to add the ids even if the files do
234
:todo: Perhaps return the ids of the files? But then again it
235
is easy to retrieve them if they're needed.
237
:todo: Option to specify file id.
239
:todo: Adding a directory should optionally recurse down and
240
add all non-ignored children. Perhaps do that in a
243
>>> b = ScratchBranch(files=['foo'])
244
>>> 'foo' in b.unknowns()
249
>>> 'foo' in b.unknowns()
251
>>> bool(b.inventory.path2id('foo'))
257
Traceback (most recent call last):
259
BzrError: ('foo is already versioned', [])
261
>>> b.add(['nothere'])
262
Traceback (most recent call last):
263
BzrError: ('cannot add: not a regular file or directory: nothere', [])
266
# TODO: Re-adding a file that is removed in the working copy
267
# should probably put it back with the previous ID.
268
if isinstance(files, types.StringTypes):
271
inv = self.read_working_inventory()
273
if is_control_file(f):
274
bailout("cannot add control file %s" % quotefn(f))
279
bailout("cannot add top-level %r" % f)
281
fullpath = os.path.normpath(self.abspath(f))
284
kind = file_kind(fullpath)
286
# maybe something better?
287
bailout('cannot add: not a regular file or directory: %s' % quotefn(f))
289
if kind != 'file' and kind != 'directory':
290
bailout('cannot add: not a regular file or directory: %s' % quotefn(f))
292
file_id = gen_file_id(f)
293
inv.add_path(f, kind=kind, file_id=file_id)
296
show_status('A', kind, quotefn(f))
298
mutter("add file %s file_id:{%s} kind=%r" % (f, file_id, kind))
300
self._write_inventory(inv)
303
def print_file(self, file, revno):
304
"""Print `file` to stdout."""
305
tree = self.revision_tree(self.lookup_revision(revno))
306
# use inventory as it was in that revision
307
file_id = tree.inventory.path2id(file)
309
bailout("%r is not present in revision %d" % (file, revno))
310
tree.print_file(file_id)
313
def remove(self, files, verbose=False):
314
"""Mark nominated files for removal from the inventory.
316
This does not remove their text. This does not run on
318
:todo: Refuse to remove modified files unless --force is given?
320
>>> b = ScratchBranch(files=['foo'])
322
>>> b.inventory.has_filename('foo')
325
>>> b.working_tree().has_filename('foo')
327
>>> b.inventory.has_filename('foo')
330
>>> b = ScratchBranch(files=['foo'])
335
>>> b.inventory.has_filename('foo')
337
>>> b.basis_tree().has_filename('foo')
339
>>> b.working_tree().has_filename('foo')
342
:todo: Do something useful with directories.
344
:todo: Should this remove the text or not? Tough call; not
345
removing may be useful and the user can just use use rm, and
346
is the opposite of add. Removing it is consistent with most
347
other tools. Maybe an option.
349
## TODO: Normalize names
350
## TODO: Remove nested loops; better scalability
352
if isinstance(files, types.StringTypes):
355
tree = self.working_tree()
358
# do this before any modifications
362
bailout("cannot remove unversioned file %s" % quotefn(f))
363
mutter("remove inventory entry %s {%s}" % (quotefn(f), fid))
365
# having remove it, it must be either ignored or unknown
366
if tree.is_ignored(f):
370
show_status(new_status, inv[fid].kind, quotefn(f))
373
self._write_inventory(inv)
377
"""Return all unknown files.
379
These are files in the working directory that are not versioned or
380
control files or ignored.
382
>>> b = ScratchBranch(files=['foo', 'foo~'])
383
>>> list(b.unknowns())
386
>>> list(b.unknowns())
389
>>> list(b.unknowns())
392
return self.working_tree().unknowns()
395
def commit(self, message, timestamp=None, timezone=None,
398
"""Commit working copy as a new revision.
400
The basic approach is to add all the file texts into the
401
store, then the inventory, then make a new revision pointing
402
to that inventory and store that.
404
This is not quite safe if the working copy changes during the
405
commit; for the moment that is simply not allowed. A better
406
approach is to make a temporary copy of the files before
407
computing their hashes, and then add those hashes in turn to
408
the inventory. This should mean at least that there are no
409
broken hash pointers. There is no way we can get a snapshot
410
of the whole directory at an instant. This would also have to
411
be robust against files disappearing, moving, etc. So the
412
whole thing is a bit hard.
414
:param timestamp: if not None, seconds-since-epoch for a
415
postdated/predated commit.
418
## TODO: Show branch names
420
# TODO: Don't commit if there are no changes, unless forced?
422
# First walk over the working inventory; and both update that
423
# and also build a new revision inventory. The revision
424
# inventory needs to hold the text-id, sha1 and size of the
425
# actual file versions committed in the revision. (These are
426
# not present in the working inventory.) We also need to
427
# detect missing/deleted files, and remove them from the
430
work_inv = self.read_working_inventory()
432
basis = self.basis_tree()
433
basis_inv = basis.inventory
435
for path, entry in work_inv.iter_entries():
436
## TODO: Cope with files that have gone missing.
438
## TODO: Check that the file kind has not changed from the previous
439
## revision of this file (if any).
443
p = self.abspath(path)
444
file_id = entry.file_id
445
mutter('commit prep file %s, id %r ' % (p, file_id))
447
if not os.path.exists(p):
448
mutter(" file is missing, removing from inventory")
450
show_status('D', entry.kind, quotefn(path))
451
missing_ids.append(file_id)
454
# TODO: Handle files that have been deleted
456
# TODO: Maybe a special case for empty files? Seems a
457
# waste to store them many times.
461
if basis_inv.has_id(file_id):
462
old_kind = basis_inv[file_id].kind
463
if old_kind != entry.kind:
464
bailout("entry %r changed kind from %r to %r"
465
% (file_id, old_kind, entry.kind))
467
if entry.kind == 'directory':
469
bailout("%s is entered as directory but not a directory" % quotefn(p))
470
elif entry.kind == 'file':
472
bailout("%s is entered as file but is not a file" % quotefn(p))
474
content = file(p, 'rb').read()
476
entry.text_sha1 = sha_string(content)
477
entry.text_size = len(content)
479
old_ie = basis_inv.has_id(file_id) and basis_inv[file_id]
481
and (old_ie.text_size == entry.text_size)
482
and (old_ie.text_sha1 == entry.text_sha1)):
483
## assert content == basis.get_file(file_id).read()
484
entry.text_id = basis_inv[file_id].text_id
485
mutter(' unchanged from previous text_id {%s}' %
489
entry.text_id = gen_file_id(entry.name)
490
self.text_store.add(content, entry.text_id)
491
mutter(' stored with text_id {%s}' % entry.text_id)
495
elif (old_ie.name == entry.name
496
and old_ie.parent_id == entry.parent_id):
501
show_status(state, entry.kind, quotefn(path))
503
for file_id in missing_ids:
504
# have to do this later so we don't mess up the iterator.
505
# since parents may be removed before their children we
508
# FIXME: There's probably a better way to do this; perhaps
509
# the workingtree should know how to filter itself.
510
if work_inv.has_id(file_id):
511
del work_inv[file_id]
514
inv_id = rev_id = _gen_revision_id(time.time())
516
inv_tmp = tempfile.TemporaryFile()
517
inv.write_xml(inv_tmp)
519
self.inventory_store.add(inv_tmp, inv_id)
520
mutter('new inventory_id is {%s}' % inv_id)
522
self._write_inventory(work_inv)
524
if timestamp == None:
525
timestamp = time.time()
527
if committer == None:
528
committer = username()
531
timezone = local_time_offset()
533
mutter("building commit log message")
534
rev = Revision(timestamp=timestamp,
537
precursor = self.last_patch(),
542
rev_tmp = tempfile.TemporaryFile()
543
rev.write_xml(rev_tmp)
545
self.revision_store.add(rev_tmp, rev_id)
546
mutter("new revision_id is {%s}" % rev_id)
548
## XXX: Everything up to here can simply be orphaned if we abort
549
## the commit; it will leave junk files behind but that doesn't
552
## TODO: Read back the just-generated changeset, and make sure it
553
## applies and recreates the right state.
555
## TODO: Also calculate and store the inventory SHA1
556
mutter("committing patch r%d" % (self.revno() + 1))
558
mutter("append to revision-history")
559
f = self.controlfile('revision-history', 'at')
560
f.write(rev_id + '\n')
564
note("commited r%d" % self.revno())
567
def get_revision(self, revision_id):
568
"""Return the Revision object for a named revision"""
569
r = Revision.read_xml(self.revision_store[revision_id])
570
assert r.revision_id == revision_id
574
def get_inventory(self, inventory_id):
575
"""Get Inventory object by hash.
577
:todo: Perhaps for this and similar methods, take a revision
578
parameter which can be either an integer revno or a
580
i = Inventory.read_xml(self.inventory_store[inventory_id])
584
def get_revision_inventory(self, revision_id):
585
"""Return inventory of a past revision."""
586
if revision_id == None:
589
return self.get_inventory(self.get_revision(revision_id).inventory_id)
592
def revision_history(self):
593
"""Return sequence of revision hashes on to this branch.
595
>>> ScratchBranch().revision_history()
598
return [chomp(l) for l in self.controlfile('revision-history').readlines()]
602
"""Return current revision number for this branch.
604
That is equivalent to the number of revisions committed to
607
>>> b = ScratchBranch()
610
>>> b.commit('no foo')
614
return len(self.revision_history())
617
def last_patch(self):
618
"""Return last patch hash, or None if no history.
620
>>> ScratchBranch().last_patch() == None
623
ph = self.revision_history()
630
def lookup_revision(self, revno):
631
"""Return revision hash for revision number."""
636
# list is 0-based; revisions are 1-based
637
return self.revision_history()[revno-1]
639
raise BzrError("no such revision %s" % revno)
642
def revision_tree(self, revision_id):
643
"""Return Tree for a revision on this branch.
645
`revision_id` may be None for the null revision, in which case
646
an `EmptyTree` is returned."""
648
if revision_id == None:
651
inv = self.get_revision_inventory(revision_id)
652
return RevisionTree(self.text_store, inv)
655
def working_tree(self):
656
"""Return a `Tree` for the working copy."""
657
return WorkingTree(self.base, self.read_working_inventory())
660
def basis_tree(self):
661
"""Return `Tree` object for last revision.
663
If there are no revisions yet, return an `EmptyTree`.
665
>>> b = ScratchBranch(files=['foo'])
666
>>> b.basis_tree().has_filename('foo')
668
>>> b.working_tree().has_filename('foo')
671
>>> b.commit('add foo')
672
>>> b.basis_tree().has_filename('foo')
675
r = self.last_patch()
679
return RevisionTree(self.text_store, self.get_revision_inventory(r))
683
def write_log(self, show_timezone='original'):
684
"""Write out human-readable log of commits to this branch
686
:param utc: If true, show dates in universal time, not local time."""
687
## TODO: Option to choose either original, utc or local timezone
690
for p in self.revision_history():
692
print 'revno:', revno
693
## TODO: Show hash if --id is given.
694
##print 'revision-hash:', p
695
rev = self.get_revision(p)
696
print 'committer:', rev.committer
697
print 'timestamp: %s' % (format_date(rev.timestamp, rev.timezone or 0,
700
## opportunistic consistency check, same as check_patch_chaining
701
if rev.precursor != precursor:
702
bailout("mismatched precursor!")
706
print ' (no message)'
708
for l in rev.message.split('\n'):
715
def rename_one(self, from_rel, to_rel):
716
tree = self.working_tree()
718
if not tree.has_filename(from_rel):
719
bailout("can't rename: old working file %r does not exist" % from_rel)
720
if tree.has_filename(to_rel):
721
bailout("can't rename: new working file %r already exists" % to_rel)
723
file_id = inv.path2id(from_rel)
725
bailout("can't rename: old name %r is not versioned" % from_rel)
727
if inv.path2id(to_rel):
728
bailout("can't rename: new name %r is already versioned" % to_rel)
730
to_dir, to_tail = os.path.split(to_rel)
731
to_dir_id = inv.path2id(to_dir)
732
if to_dir_id == None and to_dir != '':
733
bailout("can't determine destination directory id for %r" % to_dir)
735
mutter("rename_one:")
736
mutter(" file_id {%s}" % file_id)
737
mutter(" from_rel %r" % from_rel)
738
mutter(" to_rel %r" % to_rel)
739
mutter(" to_dir %r" % to_dir)
740
mutter(" to_dir_id {%s}" % to_dir_id)
742
inv.rename(file_id, to_dir_id, to_tail)
744
print "%s => %s" % (from_rel, to_rel)
746
from_abs = self.abspath(from_rel)
747
to_abs = self.abspath(to_rel)
749
os.rename(from_abs, to_abs)
751
bailout("failed to rename %r to %r: %s"
752
% (from_abs, to_abs, e[1]),
753
["rename rolled back"])
755
self._write_inventory(inv)
759
def move(self, from_paths, to_name):
762
to_name must exist as a versioned directory.
764
If to_name exists and is a directory, the files are moved into
765
it, keeping their old names. If it is a directory,
767
Note that to_name is only the last component of the new name;
768
this doesn't change the directory.
770
## TODO: Option to move IDs only
771
assert not isinstance(from_paths, basestring)
772
tree = self.working_tree()
774
to_abs = self.abspath(to_name)
775
if not isdir(to_abs):
776
bailout("destination %r is not a directory" % to_abs)
777
if not tree.has_filename(to_name):
778
bailout("destination %r not in working directory" % to_abs)
779
to_dir_id = inv.path2id(to_name)
780
if to_dir_id == None and to_name != '':
781
bailout("destination %r is not a versioned directory" % to_name)
782
to_dir_ie = inv[to_dir_id]
783
if to_dir_ie.kind not in ('directory', 'root_directory'):
784
bailout("destination %r is not a directory" % to_abs)
786
to_idpath = Set(inv.get_idpath(to_dir_id))
789
if not tree.has_filename(f):
790
bailout("%r does not exist in working tree" % f)
791
f_id = inv.path2id(f)
793
bailout("%r is not versioned" % f)
794
name_tail = splitpath(f)[-1]
795
dest_path = appendpath(to_name, name_tail)
796
if tree.has_filename(dest_path):
797
bailout("destination %r already exists" % dest_path)
798
if f_id in to_idpath:
799
bailout("can't move %r to a subdirectory of itself" % f)
801
# OK, so there's a race here, it's possible that someone will
802
# create a file in this interval and then the rename might be
803
# left half-done. But we should have caught most problems.
806
name_tail = splitpath(f)[-1]
807
dest_path = appendpath(to_name, name_tail)
808
print "%s => %s" % (f, dest_path)
809
inv.rename(inv.path2id(f), to_dir_id, name_tail)
811
os.rename(self.abspath(f), self.abspath(dest_path))
813
bailout("failed to rename %r to %r: %s" % (f, dest_path, e[1]),
814
["rename rolled back"])
816
self._write_inventory(inv)
820
def show_status(self, show_all=False):
821
"""Display single-line status for non-ignored working files.
823
The list is show sorted in order by file name.
825
>>> b = ScratchBranch(files=['foo', 'foo~'])
831
>>> b.commit("add foo")
833
>>> os.unlink(b.abspath('foo'))
838
:todo: Get state for single files.
840
:todo: Perhaps show a slash at the end of directory names.
844
# We have to build everything into a list first so that it can
845
# sorted by name, incorporating all the different sources.
847
# FIXME: Rather than getting things in random order and then sorting,
848
# just step through in order.
850
# Interesting case: the old ID for a file has been removed,
851
# but a new file has been created under that name.
853
old = self.basis_tree()
854
new = self.working_tree()
856
for fs, fid, oldname, newname, kind in diff_trees(old, new):
858
show_status(fs, kind,
859
oldname + ' => ' + newname)
860
elif fs == 'A' or fs == 'M':
861
show_status(fs, kind, newname)
863
show_status(fs, kind, oldname)
866
show_status(fs, kind, newname)
869
show_status(fs, kind, newname)
871
show_status(fs, kind, newname)
873
bailout("wierd file state %r" % ((fs, fid),))
877
class ScratchBranch(Branch):
878
"""Special test class: a branch that cleans up after itself.
880
>>> b = ScratchBranch()
888
def __init__(self, files=[], dirs=[]):
889
"""Make a test branch.
891
This creates a temporary directory and runs init-tree in it.
893
If any files are listed, they are created in the working copy.
895
Branch.__init__(self, tempfile.mkdtemp(), init=True)
897
os.mkdir(self.abspath(d))
900
file(os.path.join(self.base, f), 'w').write('content of %s' % f)
904
"""Destroy the test branch, removing the scratch directory."""
906
shutil.rmtree(self.base)
908
# Work around for shutil.rmtree failing on Windows when
909
# readonly files are encountered
910
for root, dirs, files in os.walk(self.base, topdown=False):
912
os.chmod(os.path.join(root, name), 0700)
913
shutil.rmtree(self.base)
917
######################################################################
921
def is_control_file(filename):
922
## FIXME: better check
923
filename = os.path.normpath(filename)
924
while filename != '':
925
head, tail = os.path.split(filename)
926
## mutter('check %r for control file' % ((head, tail), ))
927
if tail == bzrlib.BZRDIR:
936
def _gen_revision_id(when):
937
"""Return new revision-id."""
938
s = '%s-%s-' % (user_email(), compact_date(when))
939
s += hexlify(rand_bytes(12))
943
def gen_file_id(name):
944
"""Return new file id.
946
This should probably generate proper UUIDs, but for the moment we
947
cope with just randomness because running uuidgen every time is
949
idx = name.rfind('/')
951
name = name[idx+1 : ]
953
name = name.lstrip('.')
955
s = hexlify(rand_bytes(12))
956
return '-'.join((name, compact_date(time.time()), s))