/brz/remove-bazaar : revision 4596.1.5

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/repository.py

Committer: Martin Pool
Date: 2009-08-28 04:13:16 UTC
mfrom: (4634.6.8 2.0)
mto: This revision was merged to the branch mainline in revision 4660.
Revision ID: mbp@sourcefrog.net-20090828041316-adcbxfnfpc4bjtpl

Merge 2.0 back to trunk

files added:
bzrlib/crash.py

bzrlib/tests/features.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_lock.py

doc/developers/apport.txt

doc/developers/content-filtering.txt

files renamed:
bzrlib/tests/test_pack_repository.py => bzrlib/tests/per_pack_repository.py

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

files modified:
NEWS

bzrlib/__init__.py

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics/en/debug-flags.txt

bzrlib/index.py

bzrlib/inventory.py

bzrlib/inventory_delta.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/missing.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/shelf.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/status.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_http.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_xml.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/xml5.py

doc/developers/bug-handling.txt

doc/developers/cycle.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/releasing.txt

doc/en/developer-guide/HACKING.txt

doc/index.txt

setup.py

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

Show diffs side-by-side

added added

removed removed

bzrlib/repository.py

gpg,

graph,

inventory,

inventory_delta,

lazy_regex,

lockable_files,

lockdir,

809

810

seen_root = True

810

811

self.new_inventory = None

811

812

if len(inv_delta):

813

# This should perhaps be guarded by a check that the basis we

814

# commit against is the basis for the commit and if not do a delta

815

# against the basis.

812

816

self._any_changes = True

813

817

if not seen_root:

814

818

# housekeeping root entry changes do not affect no-change commits.

927

931

"""

928

932

if self._write_group is not self.get_transaction():

929

933

# has an unlock or relock occured ?

934

if suppress_errors:

935

mutter(

936

'(suppressed) mismatched lock context and write group. %r, %r',

937

self._write_group, self.get_transaction())

938

return

930

939

raise errors.BzrError(

931

940

'mismatched lock context and write group. %r, %r' %

932

941

(self._write_group, self.get_transaction()))

1066

1075

check_content=True):

1067

1076

"""Store lines in inv_vf and return the sha1 of the inventory."""

1068

1077

parents = [(parent,) for parent in parents]

1069

return self.inventories.add_lines((revision_id,), parents, lines,

1078

result = self.inventories.add_lines((revision_id,), parents, lines,

1070

1079

check_content=check_content)[0]

1080

self.inventories._access.flush()

1081

return result

1071

1082

1072

1083

def add_revision(self, revision_id, rev, inv=None, config=None):

1073

1084

"""Add rev to the revision store as revision_id.

1532

1543

"""Commit the contents accrued within the current write group.

1533

1544

1534

1545

:seealso: start_write_group.

1546

1547

:return: it may return an opaque hint that can be passed to 'pack'.

1535

1548

"""

1536

1549

if self._write_group is not self.get_transaction():

1537

1550

# has an unlock or relock occured ?

1698

1711

:param revprops: Optional dictionary of revision properties.

1699

1712

:param revision_id: Optional revision id.

1700

1713

"""

1714

if self._fallback_repositories:

1715

raise errors.BzrError("Cannot commit from a lightweight checkout "

1716

"to a stacked branch. See "

1717

"https://bugs.launchpad.net/bzr/+bug/375013 for details.")

1701

1718

result = self._commit_builder_class(self, parents, config,

1702

1719

timestamp, timezone, committer, revprops, revision_id)

1703

1720

self.start_write_group()

2339

2356

"""Get Inventory object by revision id."""

2340

2357

return self.iter_inventories([revision_id]).next()

2341

2358

2342

def iter_inventories(self, revision_ids):

2359

def iter_inventories(self, revision_ids, ordering=None):

2343

2360

"""Get many inventories by revision_ids.

2344

2361

2345

2362

This will buffer some or all of the texts used in constructing the

2347

2364

time.

2348

2365

2349

2366

:param revision_ids: The expected revision ids of the inventories.

2367

:param ordering: optional ordering, e.g. 'topological'. If not

2368

specified, the order of revision_ids will be preserved (by

2369

buffering if necessary).

2350

2370

:return: An iterator of inventories.

2351

2371

"""

2352

2372

if ((None in revision_ids)

2353

2373

or (_mod_revision.NULL_REVISION in revision_ids)):

2354

2374

raise ValueError('cannot get null revision inventory')

2355

return self._iter_inventories(revision_ids)

2375

return self._iter_inventories(revision_ids, ordering)

2356

2376

2357

def _iter_inventories(self, revision_ids):

2377

def _iter_inventories(self, revision_ids, ordering):

2358

2378

"""single-document based inventory iteration."""

2359

for text, revision_id in self._iter_inventory_xmls(revision_ids):

2379

inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)

2380

for text, revision_id in inv_xmls:

2360

2381

yield self.deserialise_inventory(revision_id, text)

2361

2382

2362

def _iter_inventory_xmls(self, revision_ids):

2383

def _iter_inventory_xmls(self, revision_ids, ordering):

2384

if ordering is None:

2385

order_as_requested = True

2386

ordering = 'unordered'

2387

else:

2388

order_as_requested = False

2363

2389

keys = [(revision_id,) for revision_id in revision_ids]

2364

stream = self.inventories.get_record_stream(keys, 'unordered', True)

2390

if not keys:

2391

return

2392

if order_as_requested:

2393

key_iter = iter(keys)

2394

next_key = key_iter.next()

2395

stream = self.inventories.get_record_stream(keys, ordering, True)

2365

2396

text_chunks = {}

2366

2397

for record in stream:

2367

2398

if record.storage_kind != 'absent':

2368

text_chunks[record.key] = record.get_bytes_as('chunked')

2399

chunks = record.get_bytes_as('chunked')

2400

if order_as_requested:

2401

text_chunks[record.key] = chunks

2402

else:

2403

yield ''.join(chunks), record.key[-1]

2369

2404

else:

2370

2405

raise errors.NoSuchRevision(self, record.key)

2371

for key in keys:

2372

chunks = text_chunks.pop(key)

2373

yield ''.join(chunks), key[-1]

2406

if order_as_requested:

2407

# Yield as many results as we can while preserving order.

2408

while next_key in text_chunks:

2409

chunks = text_chunks.pop(next_key)

2410

yield ''.join(chunks), next_key[-1]

2411

try:

2412

next_key = key_iter.next()

2413

except StopIteration:

2414

# We still want to fully consume the get_record_stream,

2415

# just in case it is not actually finished at this point

2416

next_key = None

2417

break

2374

2418

2375

2419

def deserialise_inventory(self, revision_id, xml):

2376

2420

"""Transform the xml into an inventory object.

2397

2441

@needs_read_lock

2398

2442

def get_inventory_xml(self, revision_id):

2399

2443

"""Get inventory XML as a file object."""

2400

texts = self._iter_inventory_xmls([revision_id])

2444

texts = self._iter_inventory_xmls([revision_id], 'unordered')

2401

2445

try:

2402

2446

text, revision_id = texts.next()

2403

2447

except StopIteration:

3019

3063

# help), and for fetching when data won't have come from the same

3020

3064

# compressor.

3021

3065

pack_compresses = False

3066

# Does the repository inventory storage understand references to trees?

3067

supports_tree_reference = None

3022

3068

3023

3069

def __str__(self):

3024

3070

return "<%s>" % self.__class__.__name__

3128

3174

raise NotImplementedError(self.network_name)

3129

3175

3130

3176

def check_conversion_target(self, target_format):

3131

raise NotImplementedError(self.check_conversion_target)

3177

if self.rich_root_data and not target_format.rich_root_data:

3178

raise errors.BadConversionTarget(

3179

'Does not support rich root data.', target_format,

3180

from_format=self)

3181

if (self.supports_tree_reference and

3182

not getattr(target_format, 'supports_tree_reference', False)):

3183

raise errors.BadConversionTarget(

3184

'Does not support nested trees', target_format,

3185

from_format=self)

3132

3186

3133

3187

def open(self, a_bzrdir, _found=False):

3134

3188

"""Return an instance of this format for the bzrdir a_bzrdir.

3663

3717

# This is redundant with format.check_conversion_target(), however that

3664

3718

# raises an exception, and we just want to say "False" as in we won't

3665

3719

# support converting between these formats.

3720

if 'IDS_never' in debug.debug_flags:

3721

return False

3666

3722

if source.supports_rich_root() and not target.supports_rich_root():

3667

3723

return False

3668

3724

if (source._format.supports_tree_reference

3669

3725

and not target._format.supports_tree_reference):

3670

3726

return False

3727

if target._fallback_repositories and target._format.supports_chks:

3728

# IDS doesn't know how to copy CHKs for the parent inventories it

3729

# adds to stacked repos.

3730

return False

3731

if 'IDS_always' in debug.debug_flags:

3732

return True

3733

# Only use this code path for local source and target. IDS does far

3734

# too much IO (both bandwidth and roundtrips) over a network.

3735

if not source.bzrdir.transport.base.startswith('file:///'):

3736

return False

3737

if not target.bzrdir.transport.base.startswith('file:///'):

3738

return False

3671

3739

return True

3672

3740

3673

def _get_delta_for_revision(self, tree, parent_ids, basis_id, cache):

3741

def _get_trees(self, revision_ids, cache):

3742

possible_trees = []

3743

for rev_id in revision_ids:

3744

if rev_id in cache:

3745

possible_trees.append((rev_id, cache[rev_id]))

3746

else:

3747

# Not cached, but inventory might be present anyway.

3748

try:

3749

tree = self.source.revision_tree(rev_id)

3750

except errors.NoSuchRevision:

3751

# Nope, parent is ghost.

3752

pass

3753

else:

3754

cache[rev_id] = tree

3755

possible_trees.append((rev_id, tree))

3756

return possible_trees

3757

3758

def _get_delta_for_revision(self, tree, parent_ids, possible_trees):

3674

3759

"""Get the best delta and base for this revision.

3675

3760

3676

3761

:return: (basis_id, delta)

3677

3762

"""

3678

possible_trees = [(parent_id, cache[parent_id])

3679

for parent_id in parent_ids

3680

if parent_id in cache]

3681

if len(possible_trees) == 0:

3682

# There either aren't any parents, or the parents aren't in the

3683

# cache, so just use the last converted tree

3684

possible_trees.append((basis_id, cache[basis_id]))

3685

3763

deltas = []

3764

# Generate deltas against each tree, to find the shortest.

3765

texts_possibly_new_in_tree = set()

3686

3766

for basis_id, basis_tree in possible_trees:

3687

3767

delta = tree.inventory._make_delta(basis_tree.inventory)

3768

for old_path, new_path, file_id, new_entry in delta:

3769

if new_path is None:

3770

# This file_id isn't present in the new rev, so we don't

3771

# care about it.

3772

continue

3773

if not new_path:

3774

# Rich roots are handled elsewhere...

3775

continue

3776

kind = new_entry.kind

3777

if kind != 'directory' and kind != 'file':

3778

# No text record associated with this inventory entry.

3779

continue

3780

# This is a directory or file that has changed somehow.

3781

texts_possibly_new_in_tree.add((file_id, new_entry.revision))

3688

3782

deltas.append((len(delta), basis_id, delta))

3689

3783

deltas.sort()

3690

3784

return deltas[0][1:]

3691

3785

3692

def _get_parent_keys(self, root_key, parent_map):

3693

"""Get the parent keys for a given root id."""

3694

root_id, rev_id = root_key

3695

# Include direct parents of the revision, but only if they used

3696

# the same root_id and are heads.

3697

parent_keys = []

3698

for parent_id in parent_map[rev_id]:

3699

if parent_id == _mod_revision.NULL_REVISION:

3700

continue

3701

if parent_id not in self._revision_id_to_root_id:

3702

# We probably didn't read this revision, go spend the

3703

# extra effort to actually check

3704

try:

3705

tree = self.source.revision_tree(parent_id)

3706

except errors.NoSuchRevision:

3707

# Ghost, fill out _revision_id_to_root_id in case we

3708

# encounter this again.

3709

# But set parent_root_id to None since we don't really know

3710

parent_root_id = None

3711

else:

3712

parent_root_id = tree.get_root_id()

3713

self._revision_id_to_root_id[parent_id] = None

3714

else:

3715

parent_root_id = self._revision_id_to_root_id[parent_id]

3716

if root_id == parent_root_id:

3717

# With stacking we _might_ want to refer to a non-local

3718

# revision, but this code path only applies when we have the

3719

# full content available, so ghosts really are ghosts, not just

3720

# the edge of local data.

3721

parent_keys.append((parent_id,))

3722

else:

3723

# root_id may be in the parent anyway.

3724

try:

3725

tree = self.source.revision_tree(parent_id)

3726

except errors.NoSuchRevision:

3727

# ghost, can't refer to it.

3728

pass

3729

else:

3730

try:

3731

parent_keys.append((tree.inventory[root_id].revision,))

3732

except errors.NoSuchId:

3733

# not in the tree

3734

pass

3735

g = graph.Graph(self.source.revisions)

3736

heads = g.heads(parent_keys)

3737

selected_keys = []

3738

for key in parent_keys:

3739

if key in heads and key not in selected_keys:

3740

selected_keys.append(key)

3741

return tuple([(root_id,)+ key for key in selected_keys])

3786

def _fetch_parent_invs_for_stacking(self, parent_map, cache):

3787

"""Find all parent revisions that are absent, but for which the

3788

inventory is present, and copy those inventories.

3742

3789

3743

def _new_root_data_stream(self, root_keys_to_create, parent_map):

3744

for root_key in root_keys_to_create:

3745

parent_keys = self._get_parent_keys(root_key, parent_map)

3746

yield versionedfile.FulltextContentFactory(root_key,

3747

parent_keys, None, '')

3790

This is necessary to preserve correctness when the source is stacked

3791

without fallbacks configured. (Note that in cases like upgrade the

3792

source may be not have _fallback_repositories even though it is

3793

stacked.)

3794

"""

3795

parent_revs = set()

3796

for parents in parent_map.values():

3797

parent_revs.update(parents)

3798

present_parents = self.source.get_parent_map(parent_revs)

3799

absent_parents = set(parent_revs).difference(present_parents)

3800

parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(

3801

(rev_id,) for rev_id in absent_parents)

3802

parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]

3803

for parent_tree in self.source.revision_trees(parent_inv_ids):

3804

current_revision_id = parent_tree.get_revision_id()

3805

parents_parents_keys = parent_invs_keys_for_stacking[

3806

(current_revision_id,)]

3807

parents_parents = [key[-1] for key in parents_parents_keys]

3808

basis_id = _mod_revision.NULL_REVISION

3809

basis_tree = self.source.revision_tree(basis_id)

3810

delta = parent_tree.inventory._make_delta(basis_tree.inventory)

3811

self.target.add_inventory_by_delta(

3812

basis_id, delta, current_revision_id, parents_parents)

3813

cache[current_revision_id] = parent_tree

3748

3814

3749

3815

def _fetch_batch(self, revision_ids, basis_id, cache):

3750

3816

"""Fetch across a few revisions.

3764

3830

pending_deltas = []

3765

3831

pending_revisions = []

3766

3832

parent_map = self.source.get_parent_map(revision_ids)

3833

self._fetch_parent_invs_for_stacking(parent_map, cache)

3767

3834

for tree in self.source.revision_trees(revision_ids):

3835

# Find a inventory delta for this revision.

3836

# Find text entries that need to be copied, too.

3768

3837

current_revision_id = tree.get_revision_id()

3769

3838

parent_ids = parent_map.get(current_revision_id, ())

3839

parent_trees = self._get_trees(parent_ids, cache)

3840

possible_trees = list(parent_trees)

3841

if len(possible_trees) == 0:

3842

# There either aren't any parents, or the parents are ghosts,

3843

# so just use the last converted tree.

3844

possible_trees.append((basis_id, cache[basis_id]))

3770

3845

basis_id, delta = self._get_delta_for_revision(tree, parent_ids,

3771

basis_id, cache)

3846

possible_trees)

3772

3847

if self._converting_to_rich_root:

3773

3848

self._revision_id_to_root_id[current_revision_id] = \

3774

3849

tree.get_root_id()

3775

# Find text entries that need to be copied

3850

# Determine which texts are in present in this revision but not in

3851

# any of the available parents.

3852

texts_possibly_new_in_tree = set()

3776

3853

for old_path, new_path, file_id, entry in delta:

3777

if new_path is not None:

3778

if not new_path:

3779

# This is the root

3780

if not self.target.supports_rich_root():

3781

# The target doesn't support rich root, so we don't

3782

# copy

3783

continue

3784

if self._converting_to_rich_root:

3785

# This can't be copied normally, we have to insert

3786

# it specially

3787

root_keys_to_create.add((file_id, entry.revision))

3788

continue

3789

text_keys.add((file_id, entry.revision))

3854

if new_path is None:

3855

# This file_id isn't present in the new rev

3856

continue

3857

if not new_path:

3858

# This is the root

3859

if not self.target.supports_rich_root():

3860

# The target doesn't support rich root, so we don't

3861

# copy

3862

continue

3863

if self._converting_to_rich_root:

3864

# This can't be copied normally, we have to insert

3865

# it specially

3866

root_keys_to_create.add((file_id, entry.revision))

3867

continue

3868

kind = entry.kind

3869

texts_possibly_new_in_tree.add((file_id, entry.revision))

3870

for basis_id, basis_tree in possible_trees:

3871

basis_inv = basis_tree.inventory

3872

for file_key in list(texts_possibly_new_in_tree):

3873

file_id, file_revision = file_key

3874

try:

3875

entry = basis_inv[file_id]

3876

except errors.NoSuchId:

3877

continue

3878

if entry.revision == file_revision:

3879

texts_possibly_new_in_tree.remove(file_key)

3880

text_keys.update(texts_possibly_new_in_tree)

3790

3881

revision = self.source.get_revision(current_revision_id)

3791

3882

pending_deltas.append((basis_id, delta,

3792

3883

current_revision_id, revision.parent_ids))

3797

3888

from_texts = self.source.texts

3798

3889

to_texts = self.target.texts

3799

3890

if root_keys_to_create:

3800

root_stream = self._new_root_data_stream(root_keys_to_create,

3801

parent_map)

3891

from bzrlib.fetch import _new_root_data_stream

3892

root_stream = _new_root_data_stream(

3893

root_keys_to_create, self._revision_id_to_root_id, parent_map,

3894

self.source)

3802

3895

to_texts.insert_record_stream(root_stream)

3803

3896

to_texts.insert_record_stream(from_texts.get_record_stream(

3804

3897

text_keys, self.target._format._fetch_order,

3811

3904

# for the new revisions that we are about to insert. We do this

3812

3905

# before adding the revisions so that no revision is added until

3813

3906

# all the inventories it may depend on are added.

3907

# Note that this is overzealous, as we may have fetched these in an

3908

# earlier batch.

3814

3909

parent_ids = set()

3815

3910

revision_ids = set()

3816

3911

for revision in pending_revisions:

3819

3914

parent_ids.difference_update(revision_ids)

3820

3915

parent_ids.discard(_mod_revision.NULL_REVISION)

3821

3916

parent_map = self.source.get_parent_map(parent_ids)

3822

for parent_tree in self.source.revision_trees(parent_ids):

3823

basis_id, delta = self._get_delta_for_revision(tree, parent_ids, basis_id, cache)

3917

# we iterate over parent_map and not parent_ids because we don't

3918

# want to try copying any revision which is a ghost

3919

for parent_tree in self.source.revision_trees(parent_map):

3824

3920

current_revision_id = parent_tree.get_revision_id()

3825

3921

parents_parents = parent_map[current_revision_id]

3922

possible_trees = self._get_trees(parents_parents, cache)

3923

if len(possible_trees) == 0:

3924

# There either aren't any parents, or the parents are

3925

# ghosts, so just use the last converted tree.

3926

possible_trees.append((basis_id, cache[basis_id]))

3927

basis_id, delta = self._get_delta_for_revision(parent_tree,

3928

parents_parents, possible_trees)

3826

3929

self.target.add_inventory_by_delta(

3827

3930

basis_id, delta, current_revision_id, parents_parents)

3828

3931

# insert signatures and revisions

3893

3996

# Walk though all revisions; get inventory deltas, copy referenced

3894

3997

# texts that delta references, insert the delta, revision and

3895

3998

# signature.

3896

first_rev = self.source.get_revision(revision_ids[0])

3897

3999

if pb is None:

3898

4000

my_pb = ui.ui_factory.nested_progress_bar()

3899

4001

pb = my_pb

4065

4167

self.file_ids = set([file_id for file_id, _ in

4066

4168

self.text_index.iterkeys()])

4067

4169

# text keys is now grouped by file_id

4068

n_weaves = len(self.file_ids)

4069

files_in_revisions = {}

4070

revisions_of_files = {}

4071

4170

n_versions = len(self.text_index)

4072

4171

progress_bar.update('loading text store', 0, n_versions)

4073

4172

parent_map = self.repository.texts.get_parent_map(self.text_index)

4166

4265

else:

4167

4266

new_pack.set_write_cache_size(1024*1024)

4168

4267

for substream_type, substream in stream:

4268

if 'stream' in debug.debug_flags:

4269

mutter('inserting substream: %s', substream_type)

4169

4270

if substream_type == 'texts':

4170

4271

self.target_repo.texts.insert_record_stream(substream)

4171

4272

elif substream_type == 'inventories':

4175

4276

else:

4176

4277

self._extract_and_insert_inventories(

4177

4278

substream, src_serializer)

4279

elif substream_type == 'inventory-deltas':

4280

self._extract_and_insert_inventory_deltas(

4281

substream, src_serializer)

4178

4282

elif substream_type == 'chk_bytes':

4179

4283

# XXX: This doesn't support conversions, as it assumes the

4180

4284

# conversion was done in the fetch code.

4231

4335

self.target_repo.pack(hint=hint)

4232

4336

return [], set()

4233

4337

4234

def _extract_and_insert_inventories(self, substream, serializer):

4338

def _extract_and_insert_inventory_deltas(self, substream, serializer):

4339

target_rich_root = self.target_repo._format.rich_root_data

4340

target_tree_refs = self.target_repo._format.supports_tree_reference

4341

for record in substream:

4342

# Insert the delta directly

4343

inventory_delta_bytes = record.get_bytes_as('fulltext')

4344

deserialiser = inventory_delta.InventoryDeltaDeserializer()

4345

try:

4346

parse_result = deserialiser.parse_text_bytes(

4347

inventory_delta_bytes)

4348

except inventory_delta.IncompatibleInventoryDelta, err:

4349

trace.mutter("Incompatible delta: %s", err.msg)

4350

raise errors.IncompatibleRevision(self.target_repo._format)

4351

basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result

4352

revision_id = new_id

4353

parents = [key[0] for key in record.parents]

4354

self.target_repo.add_inventory_by_delta(

4355

basis_id, inv_delta, revision_id, parents)

4356

4357

def _extract_and_insert_inventories(self, substream, serializer,

4358

parse_delta=None):

4235

4359

"""Generate a new inventory versionedfile in target, converting data.

4236

4360

4237

4361

The inventory is retrieved from the source, (deserializing it), and

4238

4362

stored in the target (reserializing it in a different format).

4239

4363

"""

4364

target_rich_root = self.target_repo._format.rich_root_data

4365

target_tree_refs = self.target_repo._format.supports_tree_reference

4240

4366

for record in substream:

4367

# It's not a delta, so it must be a fulltext in the source

4368

# serializer's format.

4241

4369

bytes = record.get_bytes_as('fulltext')

4242

4370

revision_id = record.key[0]

4243

4371

inv = serializer.read_inventory_from_string(bytes, revision_id)

4244

4372

parents = [key[0] for key in record.parents]

4245

4373

self.target_repo.add_inventory(revision_id, inv, parents)

4374

# No need to keep holding this full inv in memory when the rest of

4375

# the substream is likely to be all deltas.

4376

del inv

4246

4377

4247

4378

def _extract_and_insert_revisions(self, substream, serializer):

4248

4379

for record in substream:

4297

4428

return [('signatures', signatures), ('revisions', revisions)]

4298

4429

4299

4430

def _generate_root_texts(self, revs):

4300

"""This will be called by __fetch between fetching weave texts and

4431

"""This will be called by get_stream between fetching weave texts and

4301

4432

fetching the inventory weave.

4302

4303

Subclasses should override this if they need to generate root texts

4304

after fetching weave texts.

4305

4433

"""

4306

4434

if self._rich_root_upgrade():

4307

4435

import bzrlib.fetch

4314

4442

phase = 'file'

4315

4443

revs = search.get_keys()

4316

4444

graph = self.from_repository.get_graph()

4317

revs = list(graph.iter_topo_order(revs))

4445

revs = tsort.topo_sort(graph.get_parent_map(revs))

4318

4446

data_to_fetch = self.from_repository.item_keys_introduced_by(revs)

4319

4447

text_keys = []

4320

4448

for knit_kind, file_id, revisions in data_to_fetch:

4339

4467

# will be valid.

4340

4468

for _ in self._generate_root_texts(revs):

4341

4469

yield _

4342

# NB: This currently reopens the inventory weave in source;

4343

# using a single stream interface instead would avoid this.

4344

from_weave = self.from_repository.inventories

4345

4470

# we fetch only the referenced inventories because we do not

4346

4471

# know for unselected inventories whether all their required

4347

4472

# texts are present in the other repository - it could be

4386

4511

if not keys:

4387

4512

# No need to stream something we don't have

4388

4513

continue

4514

if substream_kind == 'inventories':

4515

# Some missing keys are genuinely ghosts, filter those out.

4516

present = self.from_repository.inventories.get_parent_map(keys)

4517

revs = [key[0] for key in present]

4518

# Get the inventory stream more-or-less as we do for the

4519

# original stream; there's no reason to assume that records

4520

# direct from the source will be suitable for the sink. (Think

4521

# e.g. 2a -> 1.9-rich-root).

4522

for info in self._get_inventory_stream(revs, missing=True):

4523

yield info

4524

continue

4525

4389

4526

# Ask for full texts always so that we don't need more round trips

4390

4527

# after this stream.

4391

4528

# Some of the missing keys are genuinely ghosts, so filter absent

4406

4543

return (not self.from_repository._format.rich_root_data and

4407

4544

self.to_format.rich_root_data)

4408

4545

4409

def _get_inventory_stream(self, revision_ids):

4546

def _get_inventory_stream(self, revision_ids, missing=False):

4410

4547

from_format = self.from_repository._format

4411

if (from_format.supports_chks and self.to_format.supports_chks

4412

and (from_format._serializer == self.to_format._serializer)):

4413

# Both sides support chks, and they use the same serializer, so it

4414

# is safe to transmit the chk pages and inventory pages across

4415

# as-is.

4416

return self._get_chk_inventory_stream(revision_ids)

4417

elif (not from_format.supports_chks):

4418

# Source repository doesn't support chks. So we can transmit the

4419

# inventories 'as-is' and either they are just accepted on the

4420

# target, or the Sink will properly convert it.

4421

return self._get_simple_inventory_stream(revision_ids)

4548

if (from_format.supports_chks and self.to_format.supports_chks and

4549

from_format.network_name() == self.to_format.network_name()):

4550

raise AssertionError(

4551

"this case should be handled by GroupCHKStreamSource")

4552

elif 'forceinvdeltas' in debug.debug_flags:

4553

return self._get_convertable_inventory_stream(revision_ids,

4554

delta_versus_null=missing)

4555

elif from_format.network_name() == self.to_format.network_name():

4556

# Same format.

4557

return self._get_simple_inventory_stream(revision_ids,

4558

missing=missing)

4559

elif (not from_format.supports_chks and not self.to_format.supports_chks

4560

and from_format._serializer == self.to_format._serializer):

4561

# Essentially the same format.

4562

return self._get_simple_inventory_stream(revision_ids,

4563

missing=missing)

4422

4564

else:

4423

# XXX: Hack to make not-chk->chk fetch: copy the inventories as

4424

# inventories. Note that this should probably be done somehow

4425

# as part of bzrlib.repository.StreamSink. Except JAM couldn't

4426

# figure out how a non-chk repository could possibly handle

4427

# deserializing an inventory stream from a chk repo, as it

4428

# doesn't have a way to understand individual pages.

4429

return self._get_convertable_inventory_stream(revision_ids)

4565

# Any time we switch serializations, we want to use an

4566

# inventory-delta based approach.

4567

return self._get_convertable_inventory_stream(revision_ids,

4568

delta_versus_null=missing)

4430

4569

4431

def _get_simple_inventory_stream(self, revision_ids):

4570

def _get_simple_inventory_stream(self, revision_ids, missing=False):

4571

# NB: This currently reopens the inventory weave in source;

4572

# using a single stream interface instead would avoid this.

4432

4573

from_weave = self.from_repository.inventories

4574

if missing:

4575

delta_closure = True

4576

else:

4577

delta_closure = not self.delta_on_metadata()

4433

4578

yield ('inventories', from_weave.get_record_stream(

4434

4579

[(rev_id,) for rev_id in revision_ids],

4435

self.inventory_fetch_order(),

4436

not self.delta_on_metadata()))

4437

4438

def _get_chk_inventory_stream(self, revision_ids):

4439

"""Fetch the inventory texts, along with the associated chk maps."""

4440

# We want an inventory outside of the search set, so that we can filter

4441

# out uninteresting chk pages. For now we use

4442

# _find_revision_outside_set, but if we had a Search with cut_revs, we

4443

# could use that instead.

4444

start_rev_id = self.from_repository._find_revision_outside_set(

4445

revision_ids)

4446

start_rev_key = (start_rev_id,)

4447

inv_keys_to_fetch = [(rev_id,) for rev_id in revision_ids]

4448

if start_rev_id != _mod_revision.NULL_REVISION:

4449

inv_keys_to_fetch.append((start_rev_id,))

4450

# Any repo that supports chk_bytes must also support out-of-order

4451

# insertion. At least, that is how we expect it to work

4452

# We use get_record_stream instead of iter_inventories because we want

4453

# to be able to insert the stream as well. We could instead fetch

4454

# allowing deltas, and then iter_inventories, but we don't know whether

4455

# source or target is more 'local' anway.

4456

inv_stream = self.from_repository.inventories.get_record_stream(

4457

inv_keys_to_fetch, 'unordered',

4458

True) # We need them as full-texts so we can find their references

4459

uninteresting_chk_roots = set()

4460

interesting_chk_roots = set()

4461

def filter_inv_stream(inv_stream):

4462

for idx, record in enumerate(inv_stream):

4463

### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))

4464

bytes = record.get_bytes_as('fulltext')

4465

chk_inv = inventory.CHKInventory.deserialise(

4466

self.from_repository.chk_bytes, bytes, record.key)

4467

if record.key == start_rev_key:

4468

uninteresting_chk_roots.add(chk_inv.id_to_entry.key())

4469

p_id_map = chk_inv.parent_id_basename_to_file_id

4470

if p_id_map is not None:

4471

uninteresting_chk_roots.add(p_id_map.key())

4472

else:

4473

yield record

4474

interesting_chk_roots.add(chk_inv.id_to_entry.key())

4475

p_id_map = chk_inv.parent_id_basename_to_file_id

4476

if p_id_map is not None:

4477

interesting_chk_roots.add(p_id_map.key())

4478

### pb.update('fetch inventory', 0, 2)

4479

yield ('inventories', filter_inv_stream(inv_stream))

4480

# Now that we have worked out all of the interesting root nodes, grab

4481

# all of the interesting pages and insert them

4482

### pb.update('fetch inventory', 1, 2)

4483

interesting = chk_map.iter_interesting_nodes(

4484

self.from_repository.chk_bytes, interesting_chk_roots,

4485

uninteresting_chk_roots)

4486

def to_stream_adapter():

4487

"""Adapt the iter_interesting_nodes result to a single stream.

4488

4489

iter_interesting_nodes returns records as it processes them, along

4490

with keys. However, we only want to return the records themselves.

4491

"""

4492

for record, items in interesting:

4493

if record is not None:

4494

yield record

4495

# XXX: We could instead call get_record_stream(records.keys())

4496

# ATM, this will always insert the records as fulltexts, and

4497

# requires that you can hang on to records once you have gone

4498

# on to the next one. Further, it causes the target to

4499

# recompress the data. Testing shows it to be faster than

4500

# requesting the records again, though.

4501

yield ('chk_bytes', to_stream_adapter())

4502

### pb.update('fetch inventory', 2, 2)

4503

4504

def _get_convertable_inventory_stream(self, revision_ids):

4505

# XXX: One of source or target is using chks, and they don't have

4506

# compatible serializations. The StreamSink code expects to be

4507

# able to convert on the target, so we need to put

4508

# bytes-on-the-wire that can be converted

4509

yield ('inventories', self._stream_invs_as_fulltexts(revision_ids))

4510

4511

def _stream_invs_as_fulltexts(self, revision_ids):

4580

self.inventory_fetch_order(), delta_closure))

4581

4582

def _get_convertable_inventory_stream(self, revision_ids,

4583

delta_versus_null=False):

4584

# The source is using CHKs, but the target either doesn't or it has a

4585

# different serializer. The StreamSink code expects to be able to

4586

# convert on the target, so we need to put bytes-on-the-wire that can

4587

# be converted. That means inventory deltas (if the remote is <1.19,

4588

# RemoteStreamSink will fallback to VFS to insert the deltas).

4589

yield ('inventory-deltas',

4590

self._stream_invs_as_deltas(revision_ids,

4591

delta_versus_null=delta_versus_null))

4592

4593

def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):

4594

"""Return a stream of inventory-deltas for the given rev ids.

4595

4596

:param revision_ids: The list of inventories to transmit

4597

:param delta_versus_null: Don't try to find a minimal delta for this

4598

entry, instead compute the delta versus the NULL_REVISION. This

4599

effectively streams a complete inventory. Used for stuff like

4600

filling in missing parents, etc.

4601

"""

4512

4602

from_repo = self.from_repository

4513

from_serializer = from_repo._format._serializer

4514

4603

revision_keys = [(rev_id,) for rev_id in revision_ids]

4515

4604

parent_map = from_repo.inventories.get_parent_map(revision_keys)

4516

for inv in self.from_repository.iter_inventories(revision_ids):

4517

# XXX: This is a bit hackish, but it works. Basically,

4518

# CHKSerializer 'accidentally' supports

4519

# read/write_inventory_to_string, even though that is never

4520

# the format that is stored on disk. It *does* give us a

4521

# single string representation for an inventory, so live with

4522

# it for now.

4523

# This would be far better if we had a 'serialized inventory

4524

# delta' form. Then we could use 'inventory._make_delta', and

4525

# transmit that. This would both be faster to generate, and

4526

# result in fewer bytes-on-the-wire.

4527

as_bytes = from_serializer.write_inventory_to_string(inv)

4605

# XXX: possibly repos could implement a more efficient iter_inv_deltas

4606

# method...

4607

inventories = self.from_repository.iter_inventories(

4608

revision_ids, 'topological')

4609

format = from_repo._format

4610

invs_sent_so_far = set([_mod_revision.NULL_REVISION])

4611

inventory_cache = lru_cache.LRUCache(50)

4612

null_inventory = from_repo.revision_tree(

4613

_mod_revision.NULL_REVISION).inventory

4614

# XXX: ideally the rich-root/tree-refs flags would be per-revision, not

4615

# per-repo (e.g. streaming a non-rich-root revision out of a rich-root

4616

# repo back into a non-rich-root repo ought to be allowed)

4617

serializer = inventory_delta.InventoryDeltaSerializer(

4618

versioned_root=format.rich_root_data,

4619

tree_references=format.supports_tree_reference)

4620

for inv in inventories:

4528

4621

key = (inv.revision_id,)

4529

4622

parent_keys = parent_map.get(key, ())

4623

delta = None

4624

if not delta_versus_null and parent_keys:

4625

# The caller did not ask for complete inventories and we have

4626

# some parents that we can delta against. Make a delta against

4627

# each parent so that we can find the smallest.

4628

parent_ids = [parent_key[0] for parent_key in parent_keys]

4629

for parent_id in parent_ids:

4630

if parent_id not in invs_sent_so_far:

4631

# We don't know that the remote side has this basis, so

4632

# we can't use it.

4633

continue

4634

if parent_id == _mod_revision.NULL_REVISION:

4635

parent_inv = null_inventory

4636

else:

4637

parent_inv = inventory_cache.get(parent_id, None)

4638

if parent_inv is None:

4639

parent_inv = from_repo.get_inventory(parent_id)

4640

candidate_delta = inv._make_delta(parent_inv)

4641

if (delta is None or

4642

len(delta) > len(candidate_delta)):

4643

delta = candidate_delta

4644

basis_id = parent_id

4645

if delta is None:

4646

# Either none of the parents ended up being suitable, or we

4647

# were asked to delta against NULL

4648

basis_id = _mod_revision.NULL_REVISION

4649

delta = inv._make_delta(null_inventory)

4650

invs_sent_so_far.add(inv.revision_id)

4651

inventory_cache[inv.revision_id] = inv

4652

delta_serialized = ''.join(

4653

serializer.delta_to_lines(basis_id, key[-1], delta))

4530

4654

yield versionedfile.FulltextContentFactory(

4531

key, parent_keys, None, as_bytes)

4655

key, parent_keys, None, delta_serialized)

4532

4656

4533

4657

4534

4658

def _iter_for_revno(repo, partial_history_cache, stop_index=None,

Older »