/brz/remove-bazaar : revision 2933

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/repository.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-10-24 06:48:13 UTC
mfrom: (2592.3.241 mbp-packrepo-as-knits)
Revision ID: pqm@pqm.ubuntu.com-20071024064813-wjcmv8ofabf6kdrb

Pack repositories!

files added:
bzrlib/repofmt/pack_repo.py

doc/developers/knitpack.txt

files modified:
bzr

bzrlib/bzrdir.py

bzrlib/delta.py

bzrlib/graph.py

bzrlib/index.py

bzrlib/knit.py

bzrlib/reconcile.py

bzrlib/repofmt/knitrepo.py

bzrlib/repository.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_repository.py

doc/developers/index.txt

doc/developers/repository.txt

Show diffs side-by-side

added added

removed removed

bzrlib/repository.py

494

495

returns the sha1 of the serialized inventory.

496

"""

497

assert self.is_in_write_group()

497

498

_mod_revision.check_not_reserved_id(revision_id)

498

499

assert inv.revision_id is None or inv.revision_id == revision_id, \

499

500

"Mismatch between inventory revision" \

758

759

raise NotImplementedError(self.get_data_stream)

759

760

761

def insert_data_stream(self, stream):

762

"""XXX What does this really do?

763

764

Is it a substitute for fetch?

765

Should it manage its own write group ?

766

"""

761

767

for item_key, bytes in stream:

762

768

if item_key[0] == 'file':

763

769

(file_id,) = item_key[1:]

1043

1049

signature,

1044

1050

self.get_transaction())

1045

1051

1046

def fileids_altered_by_revision_ids(self, revision_ids):

1047

"""Find the file ids and versions affected by revisions.

1048

1049

:param revisions: an iterable containing revision ids.

1052

def _find_file_ids_from_xml_inventory_lines(self, line_iterator,

1053

revision_ids):

1054

"""Helper routine for fileids_altered_by_revision_ids.

1055

1056

This performs the translation of xml lines to revision ids.

1057

1058

:param line_iterator: An iterator of lines

1059

:param revision_ids: The revision ids to filter for.

1050

1060

:return: a dictionary mapping altered file-ids to an iterable of

1051

1061

revision_ids. Each altered file-ids has the exact revision_ids that

1052

1062

altered it listed explicitly.

1053

1063

"""

1054

assert self._serializer.support_altered_by_hack, \

1055

("fileids_altered_by_revision_ids only supported for branches "

1056

"which store inventory as unnested xml, not on %r" % self)

1057

selected_revision_ids = set(revision_ids)

1058

w = self.get_inventory_weave()

1059

1064

result = {}

1060

1065

1061

1066

# this code needs to read every new line in every inventory for the

1077

1082

search = self._file_ids_altered_regex.search

1078

1083

unescape = _unescape_xml

1079

1084

setdefault = result.setdefault

1085

for line in line_iterator:

1086

match = search(line)

1087

if match is None:

1088

continue

1089

# One call to match.group() returning multiple items is quite a

1090

# bit faster than 2 calls to match.group() each returning 1

1091

file_id, revision_id = match.group('file_id', 'revision_id')

1092

1093

# Inlining the cache lookups helps a lot when you make 170,000

1094

# lines and 350k ids, versus 8.4 unique ids.

1095

# Using a cache helps in 2 ways:

1096

# 1) Avoids unnecessary decoding calls

1097

# 2) Re-uses cached strings, which helps in future set and

1098

# equality checks.

1099

# (2) is enough that removing encoding entirely along with

1100

# the cache (so we are using plain strings) results in no

1101

# performance improvement.

1102

try:

1103

revision_id = unescape_revid_cache[revision_id]

1104

except KeyError:

1105

unescaped = unescape(revision_id)

1106

unescape_revid_cache[revision_id] = unescaped

1107

revision_id = unescaped

1108

1109

if revision_id in revision_ids:

1110

try:

1111

file_id = unescape_fileid_cache[file_id]

1112

except KeyError:

1113

unescaped = unescape(file_id)

1114

unescape_fileid_cache[file_id] = unescaped

1115

file_id = unescaped

1116

setdefault(file_id, set()).add(revision_id)

1117

return result

1118

1119

def fileids_altered_by_revision_ids(self, revision_ids):

1120

"""Find the file ids and versions affected by revisions.

1121

1122

:param revisions: an iterable containing revision ids.

1123

:return: a dictionary mapping altered file-ids to an iterable of

1124

revision_ids. Each altered file-ids has the exact revision_ids that

1125

altered it listed explicitly.

1126

"""

1127

assert self._serializer.support_altered_by_hack, \

1128

("fileids_altered_by_revision_ids only supported for branches "

1129

"which store inventory as unnested xml, not on %r" % self)

1130

selected_revision_ids = set(revision_ids)

1131

w = self.get_inventory_weave()

1080

1132

pb = ui.ui_factory.nested_progress_bar()

1081

1133

try:

1082

for line in w.iter_lines_added_or_present_in_versions(

1083

selected_revision_ids, pb=pb):

1084

match = search(line)

1085

if match is None:

1086

continue

1087

# One call to match.group() returning multiple items is quite a

1088

# bit faster than 2 calls to match.group() each returning 1

1089

file_id, revision_id = match.group('file_id', 'revision_id')

1090

1091

# Inlining the cache lookups helps a lot when you make 170,000

1092

# lines and 350k ids, versus 8.4 unique ids.

1093

# Using a cache helps in 2 ways:

1094

# 1) Avoids unnecessary decoding calls

1095

# 2) Re-uses cached strings, which helps in future set and

1096

# equality checks.

1097

# (2) is enough that removing encoding entirely along with

1098

# the cache (so we are using plain strings) results in no

1099

# performance improvement.

1100

try:

1101

revision_id = unescape_revid_cache[revision_id]

1102

except KeyError:

1103

unescaped = unescape(revision_id)

1104

unescape_revid_cache[revision_id] = unescaped

1105

revision_id = unescaped

1106

1107

if revision_id in selected_revision_ids:

1108

try:

1109

file_id = unescape_fileid_cache[file_id]

1110

except KeyError:

1111

unescaped = unescape(file_id)

1112

unescape_fileid_cache[file_id] = unescaped

1113

file_id = unescaped

1114

setdefault(file_id, set()).add(revision_id)

1134

return self._find_file_ids_from_xml_inventory_lines(

1135

w.iter_lines_added_or_present_in_versions(

1136

selected_revision_ids, pb=pb),

1137

selected_revision_ids)

1115

1138

finally:

1116

1139

pb.finished()

1117

return result

1118

1140

1119

1141

def iter_files_bytes(self, desired_files):

1120

1142

"""Iterate through file versions.

1595

1617

1596

1618

def install_revision(repository, rev, revision_tree):

1597

1619

"""Install all revision data into a repository."""

1620

repository.start_write_group()

1621

try:

1622

_install_revision(repository, rev, revision_tree)

1623

except:

1624

repository.abort_write_group()

1625

raise

1626

else:

1627

repository.commit_write_group()

1628

1629

1630

def _install_revision(repository, rev, revision_tree):

1631

"""Install all revision data into a repository."""

1598

1632

present_parents = []

1599

1633

parent_trees = {}

1600

1634

for p_id in rev.parent_ids:

1934

1968

'RepositoryFormatKnit3',

1935

1969

)

1936

1970

1971

# Experimental formats. These make no guarantee about data stability.

1972

# There is one format for pre-subtrees, and one for post-subtrees to

1973

# allow ease of testing.

1974

format_registry.register_lazy(

1975

'Bazaar Experimental no-subtrees\n',

1976

'bzrlib.repofmt.pack_repo',

1977

'RepositoryFormatKnitPack1',

1978

)

1979

format_registry.register_lazy(

1980

'Bazaar Experimental subtrees\n',

1981

'bzrlib.repofmt.pack_repo',

1982

'RepositoryFormatKnitPack3',

1983

)

1984

1937

1985

1938

1986

class InterRepository(InterObject):

1939

1987

"""This class represents operations taking place between two repositories.

2244

2292

return self.source._eliminate_revisions_not_present(required_topo_revisions)

2245

2293

2246

2294

2295

class InterPackRepo(InterSameDataRepository):

2296

"""Optimised code paths between Pack based repositories."""

2297

2298

@classmethod

2299

def _get_repo_format_to_test(self):

2300

from bzrlib.repofmt import pack_repo

2301

return pack_repo.RepositoryFormatKnitPack1()

2302

2303

@staticmethod

2304

def is_compatible(source, target):

2305

"""Be compatible with known Pack formats.

2306

2307

We don't test for the stores being of specific types because that

2308

could lead to confusing results, and there is no need to be

2309

overly general.

2310

"""

2311

from bzrlib.repofmt.pack_repo import RepositoryFormatPack

2312

try:

2313

are_packs = (isinstance(source._format, RepositoryFormatPack) and

2314

isinstance(target._format, RepositoryFormatPack))

2315

except AttributeError:

2316

return False

2317

return are_packs and InterRepository._same_model(source, target)

2318

2319

@needs_write_lock

2320

def fetch(self, revision_id=None, pb=None):

2321

"""See InterRepository.fetch()."""

2322

mutter("Using fetch logic to copy between %s(%s) and %s(%s)",

2323

self.source, self.source._format, self.target, self.target._format)

2324

self.count_copied = 0

2325

if revision_id is None:

2326

# TODO:

2327

# everything to do - use pack logic

2328

# to fetch from all packs to one without

2329

# inventory parsing etc, IFF nothing to be copied is in the target.

2330

# till then:

2331

revision_ids = self.source.all_revision_ids()

2332

# implementing the TODO will involve:

2333

# - detecting when all of a pack is selected

2334

# - avoiding as much as possible pre-selection, so the

2335

# more-core routines such as create_pack_from_packs can filter in

2336

# a just-in-time fashion. (though having a HEADS list on a

2337

# repository might make this a lot easier, because we could

2338

# sensibly detect 'new revisions' without doing a full index scan.

2339

elif _mod_revision.is_null(revision_id):

2340

# nothing to do:

2341

return

2342

else:

2343

try:

2344

revision_ids = self.missing_revision_ids(revision_id)

2345

except errors.NoSuchRevision:

2346

raise errors.InstallFailed([revision_id])

2347

packs = self.source._pack_collection.all_packs()

2348

pack = self.target._pack_collection.create_pack_from_packs(

2349

packs, '.fetch', revision_ids,

2350

)

2351

if pack is not None:

2352

self.target._pack_collection._save_pack_names()

2353

# Trigger an autopack. This may duplicate effort as we've just done

2354

# a pack creation, but for now it is simpler to think about as

2355

# 'upload data, then repack if needed'.

2356

self.target._pack_collection.autopack()

2357

return pack.get_revision_count()

2358

else:

2359

return 0

2360

2361

@needs_read_lock

2362

def missing_revision_ids(self, revision_id=None):

2363

"""See InterRepository.missing_revision_ids()."""

2364

if revision_id is not None:

2365

source_ids = self.source.get_ancestry(revision_id)

2366

assert source_ids[0] is None

2367

source_ids.pop(0)

2368

else:

2369

source_ids = self.source.all_revision_ids()

2370

# source_ids is the worst possible case we may need to pull.

2371

# now we want to filter source_ids against what we actually

2372

# have in target, but don't try to check for existence where we know

2373

# we do not have a revision as that would be pointless.

2374

target_ids = set(self.target.all_revision_ids())

2375

return [r for r in source_ids if (r not in target_ids)]

2376

2377

2247

2378

class InterModel1and2(InterRepository):

2248

2379

2249

2380

@classmethod

2299

2430

"""Be compatible with Knit1 source and Knit3 target"""

2300

2431

from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3

2301

2432

try:

2302

from bzrlib.repofmt.knitrepo import RepositoryFormatKnit1, \

2303

RepositoryFormatKnit3

2304

return (isinstance(source._format, (RepositoryFormatKnit1)) and

2305

isinstance(target._format, (RepositoryFormatKnit3)))

2433

from bzrlib.repofmt.knitrepo import (RepositoryFormatKnit1,

2434

RepositoryFormatKnit3)

2435

from bzrlib.repofmt.pack_repo import (RepositoryFormatKnitPack1,

2436

RepositoryFormatKnitPack3)

2437

return (isinstance(source._format,

2438

(RepositoryFormatKnit1, RepositoryFormatKnitPack1)) and

2439

isinstance(target._format,

2440

(RepositoryFormatKnit3, RepositoryFormatKnitPack3))

2441

)

2306

2442

except AttributeError:

2307

2443

return False

2308

2444

2394

2530

InterRepository.register_optimiser(InterKnitRepo)

2395

2531

InterRepository.register_optimiser(InterModel1and2)

2396

2532

InterRepository.register_optimiser(InterKnit1and2)

2533

InterRepository.register_optimiser(InterPackRepo)

2397

2534

InterRepository.register_optimiser(InterRemoteToOther)

2398

2535

InterRepository.register_optimiser(InterOtherToRemote)

2399

2536

2563

2700

for parent in parents_from_inventories:

2564

2701

if parent in heads and parent not in new_parents:

2565

2702

new_parents.append(parent)

2566

return new_parents

2703

return tuple(new_parents)

2567

2704

2568

2705

def check_file_version_parents(self, weave, file_id):

2569

2706

result = {}

2574

2711

continue

2575

2712

text_revision = self.revision_versions.get_text_version(

2576

2713

file_id, revision_id)

2577

knit_parents = weave.get_parents(text_revision)

2714

knit_parents = tuple(weave.get_parents(text_revision))

2578

2715

if correct_parents != knit_parents:

2579

2716

result[revision_id] = (knit_parents, correct_parents)

2580

2717

return result

Older »