/brz/remove-bazaar : revision 4584.3.27

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/repository.py

Committer: Martin Pool
Date: 2009-08-20 05:02:45 UTC
mfrom: (4615 +trunk)
mto: This revision was merged to the branch mainline in revision 4632.
Revision ID: mbp@sourcefrog.net-20090820050245-o7cw6nxrzh1eah8h

News for apport feature

files added:
bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/test_lock.py

doc/developers/check.txt

files renamed:
bzrlib/tests/test_pack_repository.py => bzrlib/tests/per_pack_repository.py

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/branch.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/counted_lock.py

bzrlib/crash.py

bzrlib/diff-delta.c

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics/en/debug-flags.txt

bzrlib/index.py

bzrlib/inventory.py

bzrlib/inventory_delta.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/progress.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_status.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_xml.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/ui/text.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

doc/developers/cycle.txt

doc/developers/plugin-api.txt

doc/developers/releasing.txt

doc/en/developer-guide/HACKING.txt

doc/index.txt

setup.py

tools/win32/build_release.py

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/repository.py

gpg,

graph,

inventory,

inventory_delta,

lazy_regex,

lockable_files,

lockdir,

924

925

"""

925

926

if self._write_group is not self.get_transaction():

926

927

# has an unlock or relock occured ?

928

if suppress_errors:

929

mutter(

930

'(suppressed) mismatched lock context and write group. %r, %r',

931

self._write_group, self.get_transaction())

932

return

927

933

raise errors.BzrError(

928

934

'mismatched lock context and write group. %r, %r' %

929

935

(self._write_group, self.get_transaction()))

1063

1069

check_content=True):

1064

1070

"""Store lines in inv_vf and return the sha1 of the inventory."""

1065

1071

parents = [(parent,) for parent in parents]

1066

return self.inventories.add_lines((revision_id,), parents, lines,

1072

result = self.inventories.add_lines((revision_id,), parents, lines,

1067

1073

check_content=check_content)[0]

1074

self.inventories._access.flush()

1075

return result

1068

1076

1069

1077

def add_revision(self, revision_id, rev, inv=None, config=None):

1070

1078

"""Add rev to the revision store as revision_id.

1146

1154

# The old API returned a list, should this actually be a set?

1147

1155

return parent_map.keys()

1148

1156

1157

def _check_inventories(self, checker):

1158

"""Check the inventories found from the revision scan.

1159

1160

This is responsible for verifying the sha1 of inventories and

1161

creating a pending_keys set that covers data referenced by inventories.

1162

"""

1163

bar = ui.ui_factory.nested_progress_bar()

1164

try:

1165

self._do_check_inventories(checker, bar)

1166

finally:

1167

bar.finished()

1168

1169

def _do_check_inventories(self, checker, bar):

1170

"""Helper for _check_inventories."""

1171

revno = 0

1172

keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}

1173

kinds = ['chk_bytes', 'texts']

1174

count = len(checker.pending_keys)

1175

bar.update("inventories", 0, 2)

1176

current_keys = checker.pending_keys

1177

checker.pending_keys = {}

1178

# Accumulate current checks.

1179

for key in current_keys:

1180

if key[0] != 'inventories' and key[0] not in kinds:

1181

checker._report_items.append('unknown key type %r' % (key,))

1182

keys[key[0]].add(key[1:])

1183

if keys['inventories']:

1184

# NB: output order *should* be roughly sorted - topo or

1185

# inverse topo depending on repository - either way decent

1186

# to just delta against. However, pre-CHK formats didn't

1187

# try to optimise inventory layout on disk. As such the

1188

# pre-CHK code path does not use inventory deltas.

1189

last_object = None

1190

for record in self.inventories.check(keys=keys['inventories']):

1191

if record.storage_kind == 'absent':

1192

checker._report_items.append(

1193

'Missing inventory {%s}' % (record.key,))

1194

else:

1195

last_object = self._check_record('inventories', record,

1196

checker, last_object,

1197

current_keys[('inventories',) + record.key])

1198

del keys['inventories']

1199

else:

1200

return

1201

bar.update("texts", 1)

1202

while (checker.pending_keys or keys['chk_bytes']

1203

or keys['texts']):

1204

# Something to check.

1205

current_keys = checker.pending_keys

1206

checker.pending_keys = {}

1207

# Accumulate current checks.

1208

for key in current_keys:

1209

if key[0] not in kinds:

1210

checker._report_items.append('unknown key type %r' % (key,))

1211

keys[key[0]].add(key[1:])

1212

# Check the outermost kind only - inventories || chk_bytes || texts

1213

for kind in kinds:

1214

if keys[kind]:

1215

last_object = None

1216

for record in getattr(self, kind).check(keys=keys[kind]):

1217

if record.storage_kind == 'absent':

1218

checker._report_items.append(

1219

'Missing inventory {%s}' % (record.key,))

1220

else:

1221

last_object = self._check_record(kind, record,

1222

checker, last_object, current_keys[(kind,) + record.key])

1223

keys[kind] = set()

1224

break

1225

1226

def _check_record(self, kind, record, checker, last_object, item_data):

1227

"""Check a single text from this repository."""

1228

if kind == 'inventories':

1229

rev_id = record.key[0]

1230

inv = self.deserialise_inventory(rev_id,

1231

record.get_bytes_as('fulltext'))

1232

if last_object is not None:

1233

delta = inv._make_delta(last_object)

1234

for old_path, path, file_id, ie in delta:

1235

if ie is None:

1236

continue

1237

ie.check(checker, rev_id, inv)

1238

else:

1239

for path, ie in inv.iter_entries():

1240

ie.check(checker, rev_id, inv)

1241

if self._format.fast_deltas:

1242

return inv

1243

elif kind == 'chk_bytes':

1244

# No code written to check chk_bytes for this repo format.

1245

checker._report_items.append(

1246

'unsupported key type chk_bytes for %s' % (record.key,))

1247

elif kind == 'texts':

1248

self._check_text(record, checker, item_data)

1249

else:

1250

checker._report_items.append(

1251

'unknown key type %s for %s' % (kind, record.key))

1252

1253

def _check_text(self, record, checker, item_data):

1254

"""Check a single text."""

1255

# Check it is extractable.

1256

# TODO: check length.

1257

if record.storage_kind == 'chunked':

1258

chunks = record.get_bytes_as(record.storage_kind)

1259

sha1 = osutils.sha_strings(chunks)

1260

length = sum(map(len, chunks))

1261

else:

1262

content = record.get_bytes_as('fulltext')

1263

sha1 = osutils.sha_string(content)

1264

length = len(content)

1265

if item_data and sha1 != item_data[1]:

1266

checker._report_items.append(

1267

'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %

1268

(record.key, sha1, item_data[1], item_data[2]))

1269

1149

1270

@staticmethod

1150

1271

def create(a_bzrdir):

1151

1272

"""Construct the current default format repository in a_bzrdir."""

1416

1537

"""Commit the contents accrued within the current write group.

1417

1538

1418

1539

:seealso: start_write_group.

1540

1541

:return: it may return an opaque hint that can be passed to 'pack'.

1419

1542

"""

1420

1543

if self._write_group is not self.get_transaction():

1421

1544

# has an unlock or relock occured ?

1582

1705

:param revprops: Optional dictionary of revision properties.

1583

1706

:param revision_id: Optional revision id.

1584

1707

"""

1708

if self._fallback_repositories:

1709

raise errors.BzrError("Cannot commit from a lightweight checkout "

1710

"to a stacked branch. See "

1711

"https://bugs.launchpad.net/bzr/+bug/375013 for details.")

1585

1712

result = self._commit_builder_class(self, parents, config,

1586

1713

timestamp, timezone, committer, revprops, revision_id)

1587

1714

self.start_write_group()

1714

1841

1715

1842

@needs_read_lock

1716

1843

def get_revisions(self, revision_ids):

1717

"""Get many revisions at once."""

1844

"""Get many revisions at once.

1845

1846

Repositories that need to check data on every revision read should

1847

subclass this method.

1848

"""

1718

1849

return self._get_revisions(revision_ids)

1719

1850

1720

1851

@needs_read_lock

1721

1852

def _get_revisions(self, revision_ids):

1722

1853

"""Core work logic to get many revisions without sanity checks."""

1723

for rev_id in revision_ids:

1724

if not rev_id or not isinstance(rev_id, basestring):

1725

raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)

1854

revs = {}

1855

for revid, rev in self._iter_revisions(revision_ids):

1856

if rev is None:

1857

raise errors.NoSuchRevision(self, revid)

1858

revs[revid] = rev

1859

return [revs[revid] for revid in revision_ids]

1860

1861

def _iter_revisions(self, revision_ids):

1862

"""Iterate over revision objects.

1863

1864

:param revision_ids: An iterable of revisions to examine. None may be

1865

passed to request all revisions known to the repository. Note that

1866

not all repositories can find unreferenced revisions; for those

1867

repositories only referenced ones will be returned.

1868

:return: An iterator of (revid, revision) tuples. Absent revisions (

1869

those asked for but not available) are returned as (revid, None).

1870

"""

1871

if revision_ids is None:

1872

revision_ids = self.all_revision_ids()

1873

else:

1874

for rev_id in revision_ids:

1875

if not rev_id or not isinstance(rev_id, basestring):

1876

raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)

1726

1877

keys = [(key,) for key in revision_ids]

1727

1878

stream = self.revisions.get_record_stream(keys, 'unordered', True)

1728

revs = {}

1729

1879

for record in stream:

1880

revid = record.key[0]

1730

1881

if record.storage_kind == 'absent':

1731

raise errors.NoSuchRevision(self, record.key[0])

1732

text = record.get_bytes_as('fulltext')

1733

rev = self._serializer.read_revision_from_string(text)

1734

revs[record.key[0]] = rev

1735

return [revs[revid] for revid in revision_ids]

1882

yield (revid, None)

1883

else:

1884

text = record.get_bytes_as('fulltext')

1885

rev = self._serializer.read_revision_from_string(text)

1886

yield (revid, rev)

1736

1887

1737

1888

@needs_read_lock

1738

1889

def get_revision_xml(self, revision_id):

2093

2244

batch_size]

2094

2245

if not to_query:

2095

2246

break

2096

for rev_tree in self.revision_trees(to_query):

2097

revision_id = rev_tree.get_revision_id()

2247

for revision_id in to_query:

2098

2248

parent_ids = ancestors[revision_id]

2099

2249

for text_key in revision_keys[revision_id]:

2100

2250

pb.update("Calculating text parents", processed_texts)

2200

2350

"""Get Inventory object by revision id."""

2201

2351

return self.iter_inventories([revision_id]).next()

2202

2352

2203

def iter_inventories(self, revision_ids):

2353

def iter_inventories(self, revision_ids, ordering=None):

2204

2354

"""Get many inventories by revision_ids.

2205

2355

2206

2356

This will buffer some or all of the texts used in constructing the

2208

2358

time.

2209

2359

2210

2360

:param revision_ids: The expected revision ids of the inventories.

2361

:param ordering: optional ordering, e.g. 'topological'. If not

2362

specified, the order of revision_ids will be preserved (by

2363

buffering if necessary).

2211

2364

:return: An iterator of inventories.

2212

2365

"""

2213

2366

if ((None in revision_ids)

2214

2367

or (_mod_revision.NULL_REVISION in revision_ids)):

2215

2368

raise ValueError('cannot get null revision inventory')

2216

return self._iter_inventories(revision_ids)

2369

return self._iter_inventories(revision_ids, ordering)

2217

2370

2218

def _iter_inventories(self, revision_ids):

2371

def _iter_inventories(self, revision_ids, ordering):

2219

2372

"""single-document based inventory iteration."""

2220

for text, revision_id in self._iter_inventory_xmls(revision_ids):

2373

inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)

2374

for text, revision_id in inv_xmls:

2221

2375

yield self.deserialise_inventory(revision_id, text)

2222

2376

2223

def _iter_inventory_xmls(self, revision_ids):

2377

def _iter_inventory_xmls(self, revision_ids, ordering):

2378

if ordering is None:

2379

order_as_requested = True

2380

ordering = 'unordered'

2381

else:

2382

order_as_requested = False

2224

2383

keys = [(revision_id,) for revision_id in revision_ids]

2225

stream = self.inventories.get_record_stream(keys, 'unordered', True)

2384

if not keys:

2385

return

2386

if order_as_requested:

2387

key_iter = iter(keys)

2388

next_key = key_iter.next()

2389

stream = self.inventories.get_record_stream(keys, ordering, True)

2226

2390

text_chunks = {}

2227

2391

for record in stream:

2228

2392

if record.storage_kind != 'absent':

2229

text_chunks[record.key] = record.get_bytes_as('chunked')

2393

chunks = record.get_bytes_as('chunked')

2394

if order_as_requested:

2395

text_chunks[record.key] = chunks

2396

else:

2397

yield ''.join(chunks), record.key[-1]

2230

2398

else:

2231

2399

raise errors.NoSuchRevision(self, record.key)

2232

for key in keys:

2233

chunks = text_chunks.pop(key)

2234

yield ''.join(chunks), key[-1]

2400

if order_as_requested:

2401

# Yield as many results as we can while preserving order.

2402

while next_key in text_chunks:

2403

chunks = text_chunks.pop(next_key)

2404

yield ''.join(chunks), next_key[-1]

2405

try:

2406

next_key = key_iter.next()

2407

except StopIteration:

2408

# We still want to fully consume the get_record_stream,

2409

# just in case it is not actually finished at this point

2410

next_key = None

2411

break

2235

2412

2236

2413

def deserialise_inventory(self, revision_id, xml):

2237

2414

"""Transform the xml into an inventory object.

2258

2435

@needs_read_lock

2259

2436

def get_inventory_xml(self, revision_id):

2260

2437

"""Get inventory XML as a file object."""

2261

texts = self._iter_inventory_xmls([revision_id])

2438

texts = self._iter_inventory_xmls([revision_id], 'unordered')

2262

2439

try:

2263

2440

text, revision_id = texts.next()

2264

2441

except StopIteration:

2496

2673

[parents_provider, other_repository._make_parents_provider()])

2497

2674

return graph.Graph(parents_provider)

2498

2675

2499

def _get_versioned_file_checker(self, text_key_references=None):

2676

def _get_versioned_file_checker(self, text_key_references=None,

2677

ancestors=None):

2500

2678

"""Return an object suitable for checking versioned files.

2501

2679

2502

2680

:param text_key_references: if non-None, an already built

2504

2682

to whether they were referred to by the inventory of the

2505

2683

revision_id that they contain. If None, this will be

2506

2684

calculated.

2685

:param ancestors: Optional result from

2686

self.get_graph().get_parent_map(self.all_revision_ids()) if already

2687

available.

2507

2688

"""

2508

2689

return _VersionedFileChecker(self,

2509

text_key_references=text_key_references)

2690

text_key_references=text_key_references, ancestors=ancestors)

2510

2691

2511

2692

def revision_ids_to_search_result(self, result_set):

2512

2693

"""Convert a set of revision ids to a graph SearchResult."""

2562

2743

return record.get_bytes_as('fulltext')

2563

2744

2564

2745

@needs_read_lock

2565

def check(self, revision_ids=None):

2746

def check(self, revision_ids=None, callback_refs=None, check_repo=True):

2566

2747

"""Check consistency of all history of given revision_ids.

2567

2748

2568

2749

Different repository implementations should override _check().

2569

2750

2570

2751

:param revision_ids: A non-empty list of revision_ids whose ancestry

2571

2752

will be checked. Typically the last revision_id of a branch.

2753

:param callback_refs: A dict of check-refs to resolve and callback

2754

the check/_check method on the items listed as wanting the ref.

2755

see bzrlib.check.

2756

:param check_repo: If False do not check the repository contents, just

2757

calculate the data callback_refs requires and call them back.

2572

2758

"""

2573

return self._check(revision_ids)

2759

return self._check(revision_ids, callback_refs=callback_refs,

2760

check_repo=check_repo)

2574

2761

2575

def _check(self, revision_ids):

2576

result = check.Check(self)

2577

result.check()

2762

def _check(self, revision_ids, callback_refs, check_repo):

2763

result = check.Check(self, check_repo=check_repo)

2764

result.check(callback_refs)

2578

2765

return result

2579

2766

2580

2767

def _warn_if_deprecated(self):

2979

3166

raise NotImplementedError(self.network_name)

2980

3167

2981

3168

def check_conversion_target(self, target_format):

2982

raise NotImplementedError(self.check_conversion_target)

3169

if self.rich_root_data and not target_format.rich_root_data:

3170

raise errors.BadConversionTarget(

3171

'Does not support rich root data.', target_format,

3172

from_format=self)

3173

if (self.supports_tree_reference and

3174

not getattr(target_format, 'supports_tree_reference', False)):

3175

raise errors.BadConversionTarget(

3176

'Does not support nested trees', target_format,

3177

from_format=self)

2983

3178

2984

3179

def open(self, a_bzrdir, _found=False):

2985

3180

"""Return an instance of this format for the bzrdir a_bzrdir.

3514

3709

# This is redundant with format.check_conversion_target(), however that

3515

3710

# raises an exception, and we just want to say "False" as in we won't

3516

3711

# support converting between these formats.

3712

if 'IDS_never' in debug.debug_flags:

3713

return False

3517

3714

if source.supports_rich_root() and not target.supports_rich_root():

3518

3715

return False

3519

3716

if (source._format.supports_tree_reference

3520

3717

and not target._format.supports_tree_reference):

3521

3718

return False

3719

if target._fallback_repositories and target._format.supports_chks:

3720

# IDS doesn't know how to copy CHKs for the parent inventories it

3721

# adds to stacked repos.

3722

return False

3723

if 'IDS_always' in debug.debug_flags:

3724

return True

3725

# Only use this code path for local source and target. IDS does far

3726

# too much IO (both bandwidth and roundtrips) over a network.

3727

if not source.bzrdir.transport.base.startswith('file:///'):

3728

return False

3729

if not target.bzrdir.transport.base.startswith('file:///'):

3730

return False

3522

3731

return True

3523

3732

3524

3733

def _get_delta_for_revision(self, tree, parent_ids, basis_id, cache):

3540

3749

deltas.sort()

3541

3750

return deltas[0][1:]

3542

3751

3543

def _get_parent_keys(self, root_key, parent_map):

3544

"""Get the parent keys for a given root id."""

3545

root_id, rev_id = root_key

3546

# Include direct parents of the revision, but only if they used

3547

# the same root_id and are heads.

3548

parent_keys = []

3549

for parent_id in parent_map[rev_id]:

3550

if parent_id == _mod_revision.NULL_REVISION:

3551

continue

3552

if parent_id not in self._revision_id_to_root_id:

3553

# We probably didn't read this revision, go spend the

3554

# extra effort to actually check

3555

try:

3556

tree = self.source.revision_tree(parent_id)

3557

except errors.NoSuchRevision:

3558

# Ghost, fill out _revision_id_to_root_id in case we

3559

# encounter this again.

3560

# But set parent_root_id to None since we don't really know

3561

parent_root_id = None

3562

else:

3563

parent_root_id = tree.get_root_id()

3564

self._revision_id_to_root_id[parent_id] = None

3565

else:

3566

parent_root_id = self._revision_id_to_root_id[parent_id]

3567

if root_id == parent_root_id:

3568

# With stacking we _might_ want to refer to a non-local

3569

# revision, but this code path only applies when we have the

3570

# full content available, so ghosts really are ghosts, not just

3571

# the edge of local data.

3572

parent_keys.append((parent_id,))

3573

else:

3574

# root_id may be in the parent anyway.

3575

try:

3576

tree = self.source.revision_tree(parent_id)

3577

except errors.NoSuchRevision:

3578

# ghost, can't refer to it.

3579

pass

3580

else:

3581

try:

3582

parent_keys.append((tree.inventory[root_id].revision,))

3583

except errors.NoSuchId:

3584

# not in the tree

3585

pass

3586

g = graph.Graph(self.source.revisions)

3587

heads = g.heads(parent_keys)

3588

selected_keys = []

3589

for key in parent_keys:

3590

if key in heads and key not in selected_keys:

3591

selected_keys.append(key)

3592

return tuple([(root_id,)+ key for key in selected_keys])

3593

3594

def _new_root_data_stream(self, root_keys_to_create, parent_map):

3595

for root_key in root_keys_to_create:

3596

parent_keys = self._get_parent_keys(root_key, parent_map)

3597

yield versionedfile.FulltextContentFactory(root_key,

3598

parent_keys, None, '')

3599

3600

3752

def _fetch_batch(self, revision_ids, basis_id, cache):

3601

3753

"""Fetch across a few revisions.

3602

3754

3648

3800

from_texts = self.source.texts

3649

3801

to_texts = self.target.texts

3650

3802

if root_keys_to_create:

3651

root_stream = self._new_root_data_stream(root_keys_to_create,

3652

parent_map)

3803

from bzrlib.fetch import _new_root_data_stream

3804

root_stream = _new_root_data_stream(

3805

root_keys_to_create, self._revision_id_to_root_id, parent_map,

3806

self.source)

3653

3807

to_texts.insert_record_stream(root_stream)

3654

3808

to_texts.insert_record_stream(from_texts.get_record_stream(

3655

3809

text_keys, self.target._format._fetch_order,

3662

3816

# for the new revisions that we are about to insert. We do this

3663

3817

# before adding the revisions so that no revision is added until

3664

3818

# all the inventories it may depend on are added.

3819

# Note that this is overzealous, as we may have fetched these in an

3820

# earlier batch.

3665

3821

parent_ids = set()

3666

3822

revision_ids = set()

3667

3823

for revision in pending_revisions:

3670

3826

parent_ids.difference_update(revision_ids)

3671

3827

parent_ids.discard(_mod_revision.NULL_REVISION)

3672

3828

parent_map = self.source.get_parent_map(parent_ids)

3673

for parent_tree in self.source.revision_trees(parent_ids):

3674

basis_id, delta = self._get_delta_for_revision(tree, parent_ids, basis_id, cache)

3829

# we iterate over parent_map and not parent_ids because we don't

3830

# want to try copying any revision which is a ghost

3831

for parent_tree in self.source.revision_trees(parent_map):

3675

3832

current_revision_id = parent_tree.get_revision_id()

3676

3833

parents_parents = parent_map[current_revision_id]

3834

basis_id, delta = self._get_delta_for_revision(parent_tree,

3835

parents_parents, basis_id, cache)

3677

3836

self.target.add_inventory_by_delta(

3678

3837

basis_id, delta, current_revision_id, parents_parents)

3679

3838

# insert signatures and revisions

3693

3852

3694

3853

:param revision_ids: The list of revisions to fetch. Must be in

3695

3854

topological order.

3696

:param pb: A ProgressBar

3855

:param pb: A ProgressTask

3697

3856

:return: None

3698

3857

"""

3699

3858

basis_id, basis_tree = self._get_basis(revision_ids[0])

3744

3903

# Walk though all revisions; get inventory deltas, copy referenced

3745

3904

# texts that delta references, insert the delta, revision and

3746

3905

# signature.

3747

first_rev = self.source.get_revision(revision_ids[0])

3748

3906

if pb is None:

3749

3907

my_pb = ui.ui_factory.nested_progress_bar()

3750

3908

pb = my_pb

3873

4031

3874

4032

class _VersionedFileChecker(object):

3875

4033

3876

def __init__(self, repository, text_key_references=None):

4034

def __init__(self, repository, text_key_references=None, ancestors=None):

3877

4035

self.repository = repository

3878

4036

self.text_index = self.repository._generate_text_key_index(

3879

text_key_references=text_key_references)

4037

text_key_references=text_key_references, ancestors=ancestors)

3880

4038

3881

4039

def calculate_file_version_parents(self, text_key):

3882

4040

"""Calculate the correct parents for a file version according to

3900

4058

revision_id) tuples for versions that are present in this versioned

3901

4059

file, but not used by the corresponding inventory.

3902

4060

"""

4061

local_progress = None

4062

if progress_bar is None:

4063

local_progress = ui.ui_factory.nested_progress_bar()

4064

progress_bar = local_progress

4065

try:

4066

return self._check_file_version_parents(texts, progress_bar)

4067

finally:

4068

if local_progress:

4069

local_progress.finished()

4070

4071

def _check_file_version_parents(self, texts, progress_bar):

4072

"""See check_file_version_parents."""

3903

4073

wrong_parents = {}

3904

4074

self.file_ids = set([file_id for file_id, _ in

3905

4075

self.text_index.iterkeys()])

3906

4076

# text keys is now grouped by file_id

3907

n_weaves = len(self.file_ids)

3908

files_in_revisions = {}

3909

revisions_of_files = {}

3910

4077

n_versions = len(self.text_index)

3911

4078

progress_bar.update('loading text store', 0, n_versions)

3912

4079

parent_map = self.repository.texts.get_parent_map(self.text_index)

3914

4081

text_keys = self.repository.texts.keys()

3915

4082

unused_keys = frozenset(text_keys) - set(self.text_index)

3916

4083

for num, key in enumerate(self.text_index.iterkeys()):

3917

if progress_bar is not None:

3918

progress_bar.update('checking text graph', num, n_versions)

4084

progress_bar.update('checking text graph', num, n_versions)

3919

4085

correct_parents = self.calculate_file_version_parents(key)

3920

4086

try:

3921

4087

knit_parents = parent_map[key]

4006

4172

else:

4007

4173

new_pack.set_write_cache_size(1024*1024)

4008

4174

for substream_type, substream in stream:

4175

if 'stream' in debug.debug_flags:

4176

mutter('inserting substream: %s', substream_type)

4009

4177

if substream_type == 'texts':

4010

4178

self.target_repo.texts.insert_record_stream(substream)

4011

4179

elif substream_type == 'inventories':

4015

4183

else:

4016

4184

self._extract_and_insert_inventories(

4017

4185

substream, src_serializer)

4186

elif substream_type == 'inventory-deltas':

4187

self._extract_and_insert_inventory_deltas(

4188

substream, src_serializer)

4018

4189

elif substream_type == 'chk_bytes':

4019

4190

# XXX: This doesn't support conversions, as it assumes the

4020

4191

# conversion was done in the fetch code.

4071

4242

self.target_repo.pack(hint=hint)

4072

4243

return [], set()

4073

4244

4074

def _extract_and_insert_inventories(self, substream, serializer):

4245

def _extract_and_insert_inventory_deltas(self, substream, serializer):

4246

target_rich_root = self.target_repo._format.rich_root_data

4247

target_tree_refs = self.target_repo._format.supports_tree_reference

4248

for record in substream:

4249

# Insert the delta directly

4250

inventory_delta_bytes = record.get_bytes_as('fulltext')

4251

deserialiser = inventory_delta.InventoryDeltaDeserializer()

4252

try:

4253

parse_result = deserialiser.parse_text_bytes(

4254

inventory_delta_bytes)

4255

except inventory_delta.IncompatibleInventoryDelta, err:

4256

trace.mutter("Incompatible delta: %s", err.msg)

4257

raise errors.IncompatibleRevision(self.target_repo._format)

4258

basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result

4259

revision_id = new_id

4260

parents = [key[0] for key in record.parents]

4261

self.target_repo.add_inventory_by_delta(

4262

basis_id, inv_delta, revision_id, parents)

4263

4264

def _extract_and_insert_inventories(self, substream, serializer,

4265

parse_delta=None):

4075

4266

"""Generate a new inventory versionedfile in target, converting data.

4076

4267

4077

4268

The inventory is retrieved from the source, (deserializing it), and

4078

4269

stored in the target (reserializing it in a different format).

4079

4270

"""

4271

target_rich_root = self.target_repo._format.rich_root_data

4272

target_tree_refs = self.target_repo._format.supports_tree_reference

4080

4273

for record in substream:

4274

# It's not a delta, so it must be a fulltext in the source

4275

# serializer's format.

4081

4276

bytes = record.get_bytes_as('fulltext')

4082

4277

revision_id = record.key[0]

4083

4278

inv = serializer.read_inventory_from_string(bytes, revision_id)

4084

4279

parents = [key[0] for key in record.parents]

4085

4280

self.target_repo.add_inventory(revision_id, inv, parents)

4281

# No need to keep holding this full inv in memory when the rest of

4282

# the substream is likely to be all deltas.

4283

del inv

4086

4284

4087

4285

def _extract_and_insert_revisions(self, substream, serializer):

4088

4286

for record in substream:

4137

4335

return [('signatures', signatures), ('revisions', revisions)]

4138

4336

4139

4337

def _generate_root_texts(self, revs):

4140

"""This will be called by __fetch between fetching weave texts and

4338

"""This will be called by get_stream between fetching weave texts and

4141

4339

fetching the inventory weave.

4142

4143

Subclasses should override this if they need to generate root texts

4144

after fetching weave texts.

4145

4340

"""

4146

4341

if self._rich_root_upgrade():

4147

4342

import bzrlib.fetch

4179

4374

# will be valid.

4180

4375

for _ in self._generate_root_texts(revs):

4181

4376

yield _

4182

# NB: This currently reopens the inventory weave in source;

4183

# using a single stream interface instead would avoid this.

4184

from_weave = self.from_repository.inventories

4185

4377

# we fetch only the referenced inventories because we do not

4186

4378

# know for unselected inventories whether all their required

4187

4379

# texts are present in the other repository - it could be

4226

4418

if not keys:

4227

4419

# No need to stream something we don't have

4228

4420

continue

4421

if substream_kind == 'inventories':

4422

# Some missing keys are genuinely ghosts, filter those out.

4423

present = self.from_repository.inventories.get_parent_map(keys)

4424

revs = [key[0] for key in present]

4425

# Get the inventory stream more-or-less as we do for the

4426

# original stream; there's no reason to assume that records

4427

# direct from the source will be suitable for the sink. (Think

4428

# e.g. 2a -> 1.9-rich-root).

4429

for info in self._get_inventory_stream(revs, missing=True):

4430

yield info

4431

continue

4432

4229

4433

# Ask for full texts always so that we don't need more round trips

4230

4434

# after this stream.

4231

4435

# Some of the missing keys are genuinely ghosts, so filter absent

4246

4450

return (not self.from_repository._format.rich_root_data and

4247

4451

self.to_format.rich_root_data)

4248

4452

4249

def _get_inventory_stream(self, revision_ids):

4453

def _get_inventory_stream(self, revision_ids, missing=False):

4250

4454

from_format = self.from_repository._format

4251

if (from_format.supports_chks and self.to_format.supports_chks

4252

and (from_format._serializer == self.to_format._serializer)):

4253

# Both sides support chks, and they use the same serializer, so it

4254

# is safe to transmit the chk pages and inventory pages across

4255

# as-is.

4256

return self._get_chk_inventory_stream(revision_ids)

4257

elif (not from_format.supports_chks):

4258

# Source repository doesn't support chks. So we can transmit the

4259

# inventories 'as-is' and either they are just accepted on the

4260

# target, or the Sink will properly convert it.

4261

return self._get_simple_inventory_stream(revision_ids)

4455

if (from_format.supports_chks and self.to_format.supports_chks and

4456

from_format.network_name() == self.to_format.network_name()):

4457

raise AssertionError(

4458

"this case should be handled by GroupCHKStreamSource")

4459

elif 'forceinvdeltas' in debug.debug_flags:

4460

return self._get_convertable_inventory_stream(revision_ids,

4461

delta_versus_null=missing)

4462

elif from_format.network_name() == self.to_format.network_name():

4463

# Same format.

4464

return self._get_simple_inventory_stream(revision_ids,

4465

missing=missing)

4466

elif (not from_format.supports_chks and not self.to_format.supports_chks

4467

and from_format._serializer == self.to_format._serializer):

4468

# Essentially the same format.

4469

return self._get_simple_inventory_stream(revision_ids,

4470

missing=missing)

4262

4471

else:

4263

# XXX: Hack to make not-chk->chk fetch: copy the inventories as

4264

# inventories. Note that this should probably be done somehow

4265

# as part of bzrlib.repository.StreamSink. Except JAM couldn't

4266

# figure out how a non-chk repository could possibly handle

4267

# deserializing an inventory stream from a chk repo, as it

4268

# doesn't have a way to understand individual pages.

4269

return self._get_convertable_inventory_stream(revision_ids)

4472

# Any time we switch serializations, we want to use an

4473

# inventory-delta based approach.

4474

return self._get_convertable_inventory_stream(revision_ids,

4475

delta_versus_null=missing)

4270

4476

4271

def _get_simple_inventory_stream(self, revision_ids):

4477

def _get_simple_inventory_stream(self, revision_ids, missing=False):

4478

# NB: This currently reopens the inventory weave in source;

4479

# using a single stream interface instead would avoid this.

4272

4480

from_weave = self.from_repository.inventories

4481

if missing:

4482

delta_closure = True

4483

else:

4484

delta_closure = not self.delta_on_metadata()

4273

4485

yield ('inventories', from_weave.get_record_stream(

4274

4486

[(rev_id,) for rev_id in revision_ids],

4275

self.inventory_fetch_order(),

4276

not self.delta_on_metadata()))

4277

4278

def _get_chk_inventory_stream(self, revision_ids):

4279

"""Fetch the inventory texts, along with the associated chk maps."""

4280

# We want an inventory outside of the search set, so that we can filter

4281

# out uninteresting chk pages. For now we use

4282

# _find_revision_outside_set, but if we had a Search with cut_revs, we

4283

# could use that instead.

4284

start_rev_id = self.from_repository._find_revision_outside_set(

4285

revision_ids)

4286

start_rev_key = (start_rev_id,)

4287

inv_keys_to_fetch = [(rev_id,) for rev_id in revision_ids]

4288

if start_rev_id != _mod_revision.NULL_REVISION:

4289

inv_keys_to_fetch.append((start_rev_id,))

4290

# Any repo that supports chk_bytes must also support out-of-order

4291

# insertion. At least, that is how we expect it to work

4292

# We use get_record_stream instead of iter_inventories because we want

4293

# to be able to insert the stream as well. We could instead fetch

4294

# allowing deltas, and then iter_inventories, but we don't know whether

4295

# source or target is more 'local' anway.

4296

inv_stream = self.from_repository.inventories.get_record_stream(

4297

inv_keys_to_fetch, 'unordered',

4298

True) # We need them as full-texts so we can find their references

4299

uninteresting_chk_roots = set()

4300

interesting_chk_roots = set()

4301

def filter_inv_stream(inv_stream):

4302

for idx, record in enumerate(inv_stream):

4303

### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))

4304

bytes = record.get_bytes_as('fulltext')

4305

chk_inv = inventory.CHKInventory.deserialise(

4306

self.from_repository.chk_bytes, bytes, record.key)

4307

if record.key == start_rev_key:

4308

uninteresting_chk_roots.add(chk_inv.id_to_entry.key())

4309

p_id_map = chk_inv.parent_id_basename_to_file_id

4310

if p_id_map is not None:

4311

uninteresting_chk_roots.add(p_id_map.key())

4312

else:

4313

yield record

4314

interesting_chk_roots.add(chk_inv.id_to_entry.key())

4315

p_id_map = chk_inv.parent_id_basename_to_file_id

4316

if p_id_map is not None:

4317

interesting_chk_roots.add(p_id_map.key())

4318

### pb.update('fetch inventory', 0, 2)

4319

yield ('inventories', filter_inv_stream(inv_stream))

4320

# Now that we have worked out all of the interesting root nodes, grab

4321

# all of the interesting pages and insert them

4322

### pb.update('fetch inventory', 1, 2)

4323

interesting = chk_map.iter_interesting_nodes(

4324

self.from_repository.chk_bytes, interesting_chk_roots,

4325

uninteresting_chk_roots)

4326

def to_stream_adapter():

4327

"""Adapt the iter_interesting_nodes result to a single stream.

4328

4329

iter_interesting_nodes returns records as it processes them, along

4330

with keys. However, we only want to return the records themselves.

4331

"""

4332

for record, items in interesting:

4333

if record is not None:

4334

yield record

4335

# XXX: We could instead call get_record_stream(records.keys())

4336

# ATM, this will always insert the records as fulltexts, and

4337

# requires that you can hang on to records once you have gone

4338

# on to the next one. Further, it causes the target to

4339

# recompress the data. Testing shows it to be faster than

4340

# requesting the records again, though.

4341

yield ('chk_bytes', to_stream_adapter())

4342

### pb.update('fetch inventory', 2, 2)

4343

4344

def _get_convertable_inventory_stream(self, revision_ids):

4345

# XXX: One of source or target is using chks, and they don't have

4346

# compatible serializations. The StreamSink code expects to be

4347

# able to convert on the target, so we need to put

4348

# bytes-on-the-wire that can be converted

4349

yield ('inventories', self._stream_invs_as_fulltexts(revision_ids))

4350

4351

def _stream_invs_as_fulltexts(self, revision_ids):

4487

self.inventory_fetch_order(), delta_closure))

4488

4489

def _get_convertable_inventory_stream(self, revision_ids,

4490

delta_versus_null=False):

4491

# The source is using CHKs, but the target either doesn't or it has a

4492

# different serializer. The StreamSink code expects to be able to

4493

# convert on the target, so we need to put bytes-on-the-wire that can

4494

# be converted. That means inventory deltas (if the remote is <1.19,

4495

# RemoteStreamSink will fallback to VFS to insert the deltas).

4496

yield ('inventory-deltas',

4497

self._stream_invs_as_deltas(revision_ids,

4498

delta_versus_null=delta_versus_null))

4499

4500

def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):

4501

"""Return a stream of inventory-deltas for the given rev ids.

4502

4503

:param revision_ids: The list of inventories to transmit

4504

:param delta_versus_null: Don't try to find a minimal delta for this

4505

entry, instead compute the delta versus the NULL_REVISION. This

4506

effectively streams a complete inventory. Used for stuff like

4507

filling in missing parents, etc.

4508

"""

4352

4509

from_repo = self.from_repository

4353

from_serializer = from_repo._format._serializer

4354

4510

revision_keys = [(rev_id,) for rev_id in revision_ids]

4355

4511

parent_map = from_repo.inventories.get_parent_map(revision_keys)

4356

for inv in self.from_repository.iter_inventories(revision_ids):

4357

# XXX: This is a bit hackish, but it works. Basically,

4358

# CHKSerializer 'accidentally' supports

4359

# read/write_inventory_to_string, even though that is never

4360

# the format that is stored on disk. It *does* give us a

4361

# single string representation for an inventory, so live with

4362

# it for now.

4363

# This would be far better if we had a 'serialized inventory

4364

# delta' form. Then we could use 'inventory._make_delta', and

4365

# transmit that. This would both be faster to generate, and

4366

# result in fewer bytes-on-the-wire.

4367

as_bytes = from_serializer.write_inventory_to_string(inv)

4512

# XXX: possibly repos could implement a more efficient iter_inv_deltas

4513

# method...

4514

inventories = self.from_repository.iter_inventories(

4515

revision_ids, 'topological')

4516

format = from_repo._format

4517

invs_sent_so_far = set([_mod_revision.NULL_REVISION])

4518

inventory_cache = lru_cache.LRUCache(50)

4519

null_inventory = from_repo.revision_tree(

4520

_mod_revision.NULL_REVISION).inventory

4521

# XXX: ideally the rich-root/tree-refs flags would be per-revision, not

4522

# per-repo (e.g. streaming a non-rich-root revision out of a rich-root

4523

# repo back into a non-rich-root repo ought to be allowed)

4524

serializer = inventory_delta.InventoryDeltaSerializer(

4525

versioned_root=format.rich_root_data,

4526

tree_references=format.supports_tree_reference)

4527

for inv in inventories:

4368

4528

key = (inv.revision_id,)

4369

4529

parent_keys = parent_map.get(key, ())

4530

delta = None

4531

if not delta_versus_null and parent_keys:

4532

# The caller did not ask for complete inventories and we have

4533

# some parents that we can delta against. Make a delta against

4534

# each parent so that we can find the smallest.

4535

parent_ids = [parent_key[0] for parent_key in parent_keys]

4536

for parent_id in parent_ids:

4537

if parent_id not in invs_sent_so_far:

4538

# We don't know that the remote side has this basis, so

4539

# we can't use it.

4540

continue

4541

if parent_id == _mod_revision.NULL_REVISION:

4542

parent_inv = null_inventory

4543

else:

4544

parent_inv = inventory_cache.get(parent_id, None)

4545

if parent_inv is None:

4546

parent_inv = from_repo.get_inventory(parent_id)

4547

candidate_delta = inv._make_delta(parent_inv)

4548

if (delta is None or

4549

len(delta) > len(candidate_delta)):

4550

delta = candidate_delta

4551

basis_id = parent_id

4552

if delta is None:

4553

# Either none of the parents ended up being suitable, or we

4554

# were asked to delta against NULL

4555

basis_id = _mod_revision.NULL_REVISION

4556

delta = inv._make_delta(null_inventory)

4557

invs_sent_so_far.add(inv.revision_id)

4558

inventory_cache[inv.revision_id] = inv

4559

delta_serialized = ''.join(

4560

serializer.delta_to_lines(basis_id, key[-1], delta))

4370

4561

yield versionedfile.FulltextContentFactory(

4371

key, parent_keys, None, as_bytes)

4562

key, parent_keys, None, delta_serialized)

4372

4563

4373

4564

4374

4565

def _iter_for_revno(repo, partial_history_cache, stop_index=None,

Older »