/brz/remove-bazaar : revision 2692.1.24

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Andrew Bennetts
Date: 2008-03-27 06:10:18 UTC
mfrom: (3309 +trunk)
mto: This revision was merged to the branch mainline in revision 3320.
Revision ID: andrew.bennetts@canonical.com-20080327061018-dxztpxyv6yoeg3am

Merge from bzr.dev.

files added:
bzrlib/directory_service.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

doc/developers/development-repo.txt

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/user-guide/revnos.txt

tools/package_mf.py

files removed:
bzrlib/plugins/multiparent.py

index.txt

files renamed:
bzrlib/plugins/launchpad/lp_indirect.py => bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/test_lp_indirect.py => bzrlib/plugins/launchpad/test_lp_directory.py

files modified:
.bzrignore

NEWS

README

bzrlib/__init__.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/help_topics/__init__.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/log.py

bzrlib/mail_client.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/multiparent.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/status.py

bzrlib/symbol_versioning.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui/__init__.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/HACKING.txt

doc/developers/lca-merge.txt

doc/en/mini-tutorial/index.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/index.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/version_info.txt

setup.py

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

lazy_import(globals(), """

from bzrlib import (

annotate,

graph as _mod_graph,

lru_cache,

pack,

trace,

134

135

class KnitContent(object):

135

136

"""Content of a knit version to which deltas can be applied."""

136

137

138

def __init__(self):

139

self._should_strip_eol = False

140

137

141

def annotate(self):

138

142

"""Return a list of (origin, text) tuples."""

139

143

return list(self.annotate_iter())

142

146

"""Apply delta to this object to become new_version_id."""

143

147

raise NotImplementedError(self.apply_delta)

144

148

149

def cleanup_eol(self, copy_on_mutate=True):

150

if self._should_strip_eol:

151

if copy_on_mutate:

152

self._lines = self._lines[:]

153

self.strip_last_line_newline()

154

145

155

def line_delta_iter(self, new_lines):

146

156

"""Generate line-based delta from this content to new_lines."""

147

157

new_texts = new_lines.text()

187

197

"""Annotated content."""

188

198

189

199

def __init__(self, lines):

200

KnitContent.__init__(self)

190

201

self._lines = lines

191

202

192

203

def annotate_iter(self):

204

215

def strip_last_line_newline(self):

205

216

line = self._lines[-1][1].rstrip('\n')

206

217

self._lines[-1] = (self._lines[-1][0], line)

218

self._should_strip_eol = False

207

219

208

220

def text(self):

209

221

try:

210

return [text for origin, text in self._lines]

222

lines = [text for origin, text in self._lines]

211

223

except ValueError, e:

212

224

# most commonly (only?) caused by the internal form of the knit

213

225

# missing annotation information because of a bug - see thread

216

228

"line in annotated knit missing annotation information: %s"

217

229

% (e,))

218

230

231

if self._should_strip_eol:

232

anno, line = lines[-1]

233

lines[-1] = (anno, line.rstrip('\n'))

234

return lines

235

219

236

def copy(self):

220

237

return AnnotatedKnitContent(self._lines[:])

221

238

229

246

"""

230

247

231

248

def __init__(self, lines, version_id):

249

KnitContent.__init__(self)

232

250

self._lines = lines

233

251

self._version_id = version_id

234

252

251

269

252

270

def strip_last_line_newline(self):

253

271

self._lines[-1] = self._lines[-1].rstrip('\n')

272

self._should_strip_eol = False

254

273

255

274

def text(self):

256

return self._lines

257

258

259

class KnitAnnotateFactory(object):

275

lines = self._lines

276

if self._should_strip_eol:

277

lines = lines[:]

278

lines[-1] = lines[-1].rstrip('\n')

279

return lines

280

281

282

class _KnitFactory(object):

283

"""Base class for common Factory functions."""

284

285

def parse_record(self, version_id, record, record_details,

286

base_content, copy_base_content=True):

287

"""Parse a record into a full content object.

288

289

:param version_id: The official version id for this content

290

:param record: The data returned by read_records_iter()

291

:param record_details: Details about the record returned by

292

get_build_details

293

:param base_content: If get_build_details returns a compression_parent,

294

you must return a base_content here, else use None

295

:param copy_base_content: When building from the base_content, decide

296

you can either copy it and return a new object, or modify it in

297

place.

298

:return: (content, delta) A Content object and possibly a line-delta,

299

delta may be None

300

"""

301

method, noeol = record_details

302

if method == 'line-delta':

303

assert base_content is not None

304

if copy_base_content:

305

content = base_content.copy()

306

else:

307

content = base_content

308

delta = self.parse_line_delta(record, version_id)

309

content.apply_delta(delta, version_id)

310

else:

311

content = self.parse_fulltext(record, version_id)

312

delta = None

313

content._should_strip_eol = noeol

314

return (content, delta)

315

316

317

class KnitAnnotateFactory(_KnitFactory):

260

318

"""Factory for creating annotated Content objects."""

261

319

262

320

annotated = True

368

426

return content.annotate_iter()

369

427

370

428

371

class KnitPlainFactory(object):

429

class KnitPlainFactory(_KnitFactory):

372

430

"""Factory for creating plain Content objects."""

373

431

374

432

annotated = False

426

484

return out

427

485

428

486

def annotate_iter(self, knit, version_id):

429

return annotate_knit(knit, version_id)

487

annotator = _KnitAnnotator(knit)

488

return iter(annotator.annotate(version_id))

430

489

431

490

432

491

def make_empty_knit(transport, relpath):

516

575

fulltext_size = size

517

576

break

518

577

delta_size += size

519

delta_parents = self._index.get_parents(parent)

578

delta_parents = self._index.get_parent_map([parent])[parent]

520

579

else:

521

580

# We couldn't find a fulltext, so we must create a new one

522

581

return False

661

720

def get_delta(self, version_id):

662

721

"""Get a delta for constructing version from some other version."""

663

722

self.check_not_reserved_id(version_id)

664

parents = self.get_parents(version_id)

723

parents = self.get_parent_map([version_id])[version_id]

665

724

if len(parents):

666

725

parent = parents[0]

667

726

else:

748

807

if self.has_version(version_id):

749

808

# First check: the list of parents.

750

809

my_parents = self.get_parents_with_ghosts(version_id)

751

if my_parents != parents:

810

if tuple(my_parents) != tuple(parents):

752

811

# XXX: KnitCorrupt is not quite the right exception here.

753

812

raise KnitCorrupt(

754

813

self.filename,

807

866

factory = KnitAnnotateFactory()

808

867

else:

809

868

raise errors.KnitDataStreamUnknown(format)

810

index = _StreamIndex(data_list)

869

index = _StreamIndex(data_list, self._index)

811

870

access = _StreamAccess(reader_callable, index, self, factory)

812

871

return KnitVersionedFile(self.filename, self.transport,

813

872

factory=factory, index=index, access_method=access)

874

933

875

934

This data is intended to be used for retrieving the knit records.

876

935

877

A dict of version_id to (method, data_pos, data_size, next) is

936

A dict of version_id to (record_details, index_memo, next, parents) is

878

937

returned.

879

938

method is the way referenced data should be applied.

880

data_pos is the position of the data in the knit.

881

data_size is the size of the data in the knit.

939

index_memo is the handle to pass to the data access to actually get the

940

data

882

941

next is the build-parent of the version, or None for fulltexts.

942

parents is the version_ids of the parents of this version

883

943

"""

884

944

component_data = {}

885

for version_id in version_ids:

886

cursor = version_id

887

888

while cursor is not None and cursor not in component_data:

889

method = self._index.get_method(cursor)

890

if method == 'fulltext':

891

next = None

892

else:

893

next = self.get_parents_with_ghosts(cursor)[0]

894

index_memo = self._index.get_position(cursor)

895

component_data[cursor] = (method, index_memo, next)

896

cursor = next

945

pending_components = version_ids

946

while pending_components:

947

build_details = self._index.get_build_details(pending_components)

948

current_components = set(pending_components)

949

pending_components = set()

950

for version_id, details in build_details.iteritems():

951

(index_memo, compression_parent, parents,

952

record_details) = details

953

method = record_details[0]

954

if compression_parent is not None:

955

pending_components.add(compression_parent)

956

component_data[version_id] = (record_details, index_memo,

957

compression_parent)

958

missing = current_components.difference(build_details)

959

if missing:

960

raise errors.RevisionNotPresent(missing.pop(), self.filename)

897

961

return component_data

898

962

899

963

def _get_content(self, version_id, parent_texts={}):

913

977

self._index.check_versions_present(version_ids)

914

978

915

979

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

916

nostore_sha, random_id, check_content):

980

nostore_sha, random_id, check_content, left_matching_blocks):

917

981

"""See VersionedFile.add_lines_with_ghosts()."""

918

982

self._check_add(version_id, lines, random_id, check_content)

919

983

return self._add(version_id, lines, parents, self.delta,

920

parent_texts, None, nostore_sha, random_id)

984

parent_texts, left_matching_blocks, nostore_sha, random_id)

921

985

922

986

def _add_lines(self, version_id, parents, lines, parent_texts,

923

987

left_matching_blocks, nostore_sha, random_id, check_content):

1037

1101

def _get_record_map(self, version_ids):

1038

1102

"""Produce a dictionary of knit records.

1039

1103

1040

The keys are version_ids, the values are tuples of (method, content,

1041

digest, next).

1042

method is the way the content should be applied.

1043

content is a KnitContent object.

1044

digest is the SHA1 digest of this version id after all steps are done

1045

next is the build-parent of the version, i.e. the leftmost ancestor.

1046

If the method is fulltext, next will be None.

1104

:return: {version_id:(record, record_details, digest, next)}

1105

record

1106

data returned from read_records

1107

record_details

1108

opaque information to pass to parse_record

1109

digest

1110

SHA1 digest of the full text after all steps are done

1111

1112

build-parent of the version, i.e. the leftmost ancestor.

1113

Will be None if the record is not a delta.

1047

1114

"""

1048

1115

position_map = self._get_components_positions(version_ids)

1049

# c = component_id, m = method, i_m = index_memo, n = next

1050

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

1116

# c = component_id, r = record_details, i_m = index_memo, n = next

1117

records = [(c, i_m) for c, (r, i_m, n)

1118

in position_map.iteritems()]

1051

1119

record_map = {}

1052

for component_id, content, digest in \

1120

for component_id, record, digest in \

1053

1121

self._data.read_records_iter(records):

1054

method, index_memo, next = position_map[component_id]

1055

record_map[component_id] = method, content, digest, next

1056

1122

(record_details, index_memo, next) = position_map[component_id]

1123

record_map[component_id] = record, record_details, digest, next

1124

1057

1125

return record_map

1058

1126

1059

1127

def get_text(self, version_id):

1094

1162

components = []

1095

1163

cursor = version_id

1096

1164

while cursor is not None:

1097

method, data, digest, next = record_map[cursor]

1098

components.append((cursor, method, data, digest))

1165

record, record_details, digest, next = record_map[cursor]

1166

components.append((cursor, record, record_details, digest))

1099

1167

if cursor in content_map:

1100

1168

break

1101

1169

cursor = next

1102

1170

1103

1171

content = None

1104

for component_id, method, data, digest in reversed(components):

1172

for (component_id, record, record_details,

1173

digest) in reversed(components):

1105

1174

if component_id in content_map:

1106

1175

content = content_map[component_id]

1107

1176

else:

1108

if method == 'fulltext':

1109

assert content is None

1110

content = self.factory.parse_fulltext(data, version_id)

1111

elif method == 'line-delta':

1112

delta = self.factory.parse_line_delta(data, version_id)

1113

if multiple_versions:

1114

# only doing this when we want multiple versions

1115

# output avoids list copies - which reference and

1116

# dereference many strings.

1117

content = content.copy()

1118

content.apply_delta(delta, version_id)

1177

content, delta = self.factory.parse_record(version_id,

1178

record, record_details, content,

1179

copy_base_content=multiple_versions)

1119

1180

if multiple_versions:

1120

1181

content_map[component_id] = content

1121

1182

1122

if 'no-eol' in self._index.get_options(version_id):

1123

if multiple_versions:

1124

content = content.copy()

1125

content.strip_last_line_newline()

1183

content.cleanup_eol(copy_on_mutate=multiple_versions)

1126

1184

final_content[version_id] = content

1127

1185

1128

1186

# digest here is the digest from the last applied component.

1200

1258

"""See VersionedFile.annotate_iter."""

1201

1259

return self.factory.annotate_iter(self, version_id)

1202

1260

1203

def get_parents(self, version_id):

1204

"""See VersionedFile.get_parents."""

1205

# perf notes:

1206

# optimism counts!

1207

# 52554 calls in 1264 872 internal down from 3674

1208

try:

1209

return self._index.get_parents(version_id)

1210

except KeyError:

1211

raise RevisionNotPresent(version_id, self.filename)

1212

1213

def get_parents_with_ghosts(self, version_id):

1214

"""See VersionedFile.get_parents."""

1215

try:

1216

return self._index.get_parents_with_ghosts(version_id)

1217

except KeyError:

1218

raise RevisionNotPresent(version_id, self.filename)

1261

def get_parent_map(self, version_ids):

1262

"""See VersionedFile.get_parent_map."""

1263

return self._index.get_parent_map(version_ids)

1219

1264

1220

1265

def get_ancestry(self, versions, topo_sorted=True):

1221

1266

"""See VersionedFile.get_ancestry."""

1426

1471

graph[version] = parents

1427

1472

return topo_sort(graph.items())

1428

1473

1474

def get_build_details(self, version_ids):

1475

"""Get the method, index_memo and compression parent for version_ids.

1476

1477

Ghosts are omitted from the result.

1478

1479

:param version_ids: An iterable of version_ids.

1480

:return: A dict of version_id:(index_memo, compression_parent,

1481

parents, record_details).

1482

index_memo

1483

opaque structure to pass to read_records to extract the raw

1484

data

1485

compression_parent

1486

Content that this record is built upon, may be None

1487

parents

1488

Logical parents of this node

1489

record_details

1490

extra information about the content which needs to be passed to

1491

Factory.parse_record

1492

"""

1493

result = {}

1494

for version_id in version_ids:

1495

if version_id not in self._cache:

1496

# ghosts are omitted

1497

continue

1498

method = self.get_method(version_id)

1499

parents = self.get_parents_with_ghosts(version_id)

1500

if method == 'fulltext':

1501

compression_parent = None

1502

else:

1503

compression_parent = parents[0]

1504

noeol = 'no-eol' in self.get_options(version_id)

1505

index_memo = self.get_position(version_id)

1506

result[version_id] = (index_memo, compression_parent,

1507

parents, (method, noeol))

1508

return result

1509

1429

1510

def iter_parents(self, version_ids):

1430

1511

"""Iterate through the parents for many version ids.

1431

1512

1435

1516

The order is undefined, allowing for different optimisations in

1436

1517

the underlying implementation.

1437

1518

"""

1438

for version_id in version_ids:

1439

try:

1440

yield version_id, tuple(self.get_parents(version_id))

1441

except KeyError:

1442

pass

1519

parent_map = self.get_parent_map(version_ids)

1520

parent_map_set = set(parent_map)

1521

unknown_existence = set()

1522

for parents in parent_map.itervalues():

1523

unknown_existence.update(parents)

1524

unknown_existence.difference_update(parent_map_set)

1525

present_parents = set(self.get_parent_map(unknown_existence))

1526

present_parents.update(parent_map_set)

1527

for version_id, parents in parent_map.iteritems():

1528

parents = tuple(parent for parent in parents

1529

if parent in present_parents)

1530

yield version_id, parents

1443

1531

1444

1532

def num_versions(self):

1445

1533

return len(self._history)

1488

1576

assert isinstance(line, str), \

1489

1577

'content must be utf-8 encoded: %r' % (line,)

1490

1578

lines.append(line)

1491

self._cache_version(version_id, options, pos, size, parents)

1579

self._cache_version(version_id, options, pos, size, tuple(parents))

1492

1580

if not self._need_to_create:

1493

1581

self._transport.append_bytes(self._filename, ''.join(lines))

1494

1582

else:

1543

1631

"""

1544

1632

return self._cache[version_id][1]

1545

1633

1546

def get_parents(self, version_id):

1547

"""Return parents of specified version ignoring ghosts."""

1548

return [parent for parent in self._cache[version_id][4]

1549

if parent in self._cache]

1634

def get_parent_map(self, version_ids):

1635

"""Passed through to by KnitVersionedFile.get_parent_map."""

1636

result = {}

1637

for version_id in version_ids:

1638

try:

1639

result[version_id] = tuple(self._cache[version_id][4])

1640

except KeyError:

1641

pass

1642

return result

1550

1643

1551

1644

def get_parents_with_ghosts(self, version_id):

1552

1645

"""Return parents of specified version with ghosts."""

1553

return self._cache[version_id][4]

1646

try:

1647

return self.get_parent_map([version_id])[version_id]

1648

except KeyError:

1649

raise RevisionNotPresent(version_id, self)

1554

1650

1555

1651

def check_versions_present(self, version_ids):

1556

1652

"""Check that all specified versions are present."""

1689

1785

result_keys = topo_sort(graph.items())

1690

1786

return [key[0] for key in result_keys]

1691

1787

1788

def get_build_details(self, version_ids):

1789

"""Get the method, index_memo and compression parent for version_ids.

1790

1791

Ghosts are omitted from the result.

1792

1793

:param version_ids: An iterable of version_ids.

1794

:return: A dict of version_id:(index_memo, compression_parent,

1795

parents, record_details).

1796

index_memo

1797

opaque structure to pass to read_records to extract the raw

1798

data

1799

compression_parent

1800

Content that this record is built upon, may be None

1801

parents

1802

Logical parents of this node

1803

record_details

1804

extra information about the content which needs to be passed to

1805

Factory.parse_record

1806

"""

1807

result = {}

1808

entries = self._get_entries(self._version_ids_to_keys(version_ids), True)

1809

for entry in entries:

1810

version_id = self._keys_to_version_ids((entry[1],))[0]

1811

if not self._parents:

1812

parents = ()

1813

else:

1814

parents = self._keys_to_version_ids(entry[3][0])

1815

if not self._deltas:

1816

compression_parent = None

1817

else:

1818

compression_parent_key = self._compression_parent(entry)

1819

if compression_parent_key:

1820

compression_parent = self._keys_to_version_ids(

1821

(compression_parent_key,))[0]

1822

else:

1823

compression_parent = None

1824

noeol = (entry[2][0] == 'N')

1825

if compression_parent:

1826

method = 'line-delta'

1827

else:

1828

method = 'fulltext'

1829

result[version_id] = (self._node_to_position(entry),

1830

compression_parent, parents,

1831

(method, noeol))

1832

return result

1833

1834

def _compression_parent(self, an_entry):

1835

# return the key that an_entry is compressed against, or None

1836

# Grab the second parent list (as deltas implies parents currently)

1837

compression_parents = an_entry[3][1]

1838

if not compression_parents:

1839

return None

1840

assert len(compression_parents) == 1

1841

return compression_parents[0]

1842

1843

def _get_method(self, node):

1844

if not self._deltas:

1845

return 'fulltext'

1846

if self._compression_parent(node):

1847

return 'line-delta'

1848

else:

1849

return 'fulltext'

1850

1692

1851

def get_graph(self):

1693

1852

"""Return a list of the node:parents lists from this knit index."""

1694

1853

if not self._parents:

1750

1909

logic to get the record.

1751

1910

"""

1752

1911

node = self._get_node(version_id)

1912

return self._node_to_position(node)

1913

1914

def _node_to_position(self, node):

1915

"""Convert an index value to position details."""

1753

1916

bits = node[2][1:].split(' ')

1754

1917

return node[0], int(bits[0]), int(bits[1])

1755

1918

1756

1919

def get_method(self, version_id):

1757

1920

"""Return compression method of specified version."""

1758

if not self._deltas:

1759

return 'fulltext'

1760

return self._parent_compression(self._get_node(version_id)[3][1])

1761

1762

def _parent_compression(self, reference_list):

1763

# use the second reference list to decide if this is delta'd or not.

1764

if len(reference_list):

1765

return 'line-delta'

1766

else:

1767

return 'fulltext'

1921

return self._get_method(self._get_node(version_id))

1768

1922

1769

1923

def _get_node(self, version_id):

1770

1924

try:

1778

1932

e.g. ['foo', 'bar']

1779

1933

"""

1780

1934

node = self._get_node(version_id)

1781

if not self._deltas:

1782

options = ['fulltext']

1783

else:

1784

options = [self._parent_compression(node[3][1])]

1935

options = [self._get_method(node)]

1785

1936

if node[2][0] == 'N':

1786

1937

options.append('no-eol')

1787

1938

return options

1788

1939

1789

def get_parents(self, version_id):

1790

"""Return parents of specified version ignoring ghosts."""

1791

parents = list(self.iter_parents([version_id]))

1792

if not parents:

1793

# missing key

1794

raise errors.RevisionNotPresent(version_id, self)

1795

return parents[0][1]

1940

def get_parent_map(self, version_ids):

1941

"""Passed through to by KnitVersionedFile.get_parent_map."""

1942

nodes = self._get_entries(self._version_ids_to_keys(version_ids))

1943

result = {}

1944

if self._parents:

1945

for node in nodes:

1946

result[node[1][0]] = self._keys_to_version_ids(node[3][0])

1947

else:

1948

for node in nodes:

1949

result[node[1][0]] = ()

1950

return result

1796

1951

1797

1952

def get_parents_with_ghosts(self, version_id):

1798

1953

"""Return parents of specified version with ghosts."""

1799

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1800

check_present=True))

1801

if not self._parents:

1802

return ()

1803

return self._keys_to_version_ids(nodes[0][3][0])

1954

try:

1955

return self.get_parent_map([version_id])[version_id]

1956

except KeyError:

1957

raise RevisionNotPresent(version_id, self)

1804

1958

1805

1959

def check_versions_present(self, version_ids):

1806

1960

"""Check that all specified versions are present."""

2124

2278

class _StreamIndex(object):

2125

2279

"""A Knit Index object that uses the data map from a datastream."""

2126

2280

2127

def __init__(self, data_list):

2281

def __init__(self, data_list, backing_index):

2128

2282

"""Create a _StreamIndex object.

2129

2283

2130

2284

:param data_list: The data_list from the datastream.

2285

:param backing_index: The index which will supply values for nodes

2286

referenced outside of this stream.

2131

2287

"""

2132

2288

self.data_list = data_list

2289

self.backing_index = backing_index

2133

2290

self._by_version = {}

2134

2291

pos = 0

2135

2292

for key, options, length, parents in data_list:

2159

2316

ancestry.add(version)

2160

2317

return list(ancestry)

2161

2318

2319

def get_build_details(self, version_ids):

2320

"""Get the method, index_memo and compression parent for version_ids.

2321

2322

Ghosts are omitted from the result.

2323

2324

:param version_ids: An iterable of version_ids.

2325

:return: A dict of version_id:(index_memo, compression_parent,

2326

parents, record_details).

2327

index_memo

2328

opaque structure to pass to read_records to extract the raw

2329

data

2330

compression_parent

2331

Content that this record is built upon, may be None

2332

parents

2333

Logical parents of this node

2334

record_details

2335

extra information about the content which needs to be passed to

2336

Factory.parse_record

2337

"""

2338

result = {}

2339

for version_id in version_ids:

2340

try:

2341

method = self.get_method(version_id)

2342

except errors.RevisionNotPresent:

2343

# ghosts are omitted

2344

continue

2345

parent_ids = self.get_parents_with_ghosts(version_id)

2346

noeol = ('no-eol' in self.get_options(version_id))

2347

if method == 'fulltext':

2348

compression_parent = None

2349

else:

2350

compression_parent = parent_ids[0]

2351

index_memo = self.get_position(version_id)

2352

result[version_id] = (index_memo, compression_parent,

2353

parent_ids, (method, noeol))

2354

return result

2355

2162

2356

def get_method(self, version_id):

2163

2357

"""Return compression method of specified version."""

2164

2358

try:

2166

2360

except KeyError:

2167

2361

# Strictly speaking this should check in the backing knit, but

2168

2362

# until we have a test to discriminate, this will do.

2169

return 'fulltext'

2363

return self.backing_index.get_method(version_id)

2170

2364

if 'fulltext' in options:

2171

2365

return 'fulltext'

2172

2366

elif 'line-delta' in options:

2179

2373

2180

2374

e.g. ['foo', 'bar']

2181

2375

"""

2182

return self._by_version[version_id][0]

2376

try:

2377

return self._by_version[version_id][0]

2378

except KeyError:

2379

return self.backing_index.get_options(version_id)

2380

2381

def get_parent_map(self, version_ids):

2382

"""Passed through to by KnitVersionedFile.get_parent_map."""

2383

result = {}

2384

pending_ids = set()

2385

for version_id in version_ids:

2386

try:

2387

result[version_id] = self._by_version[version_id][2]

2388

except KeyError:

2389

pending_ids.add(version_id)

2390

result.update(self.backing_index.get_parent_map(pending_ids))

2391

return result

2183

2392

2184

2393

def get_parents_with_ghosts(self, version_id):

2185

2394

"""Return parents of specified version with ghosts."""

2186

return self._by_version[version_id][2]

2395

try:

2396

return self.get_parent_map([version_id])[version_id]

2397

except KeyError:

2398

raise RevisionNotPresent(version_id, self)

2187

2399

2188

2400

def get_position(self, version_id):

2189

2401

"""Return details needed to access the version.

2639

2851

# do the join:

2640

2852

count = 0

2641

2853

total = len(version_list)

2854

parent_map = self.source.get_parent_map(version_list)

2642

2855

for version_id in version_list:

2643

2856

pb.update("Converting to knit", count, total)

2644

parents = self.source.get_parents(version_id)

2857

parents = parent_map[version_id]

2645

2858

# check that its will be a consistent copy:

2646

2859

for parent in parents:

2647

2860

# if source has the parent, we must already have it

2668

2881

It will work for knits with cached annotations, but this is not

2669

2882

recommended.

2670

2883

"""

2671

ancestry = knit.get_ancestry(revision_id)

2672

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2673

annotations = {}

2674

for candidate in ancestry:

2675

if candidate in annotations:

2676

continue

2677

parents = knit.get_parents(candidate)

2678

if len(parents) == 0:

2679

blocks = None

2680

elif knit._index.get_method(candidate) != 'line-delta':

2681

blocks = None

2884

annotator = _KnitAnnotator(knit)

2885

return iter(annotator.annotate(revision_id))

2886

2887

2888

class _KnitAnnotator(object):

2889

"""Build up the annotations for a text."""

2890

2891

def __init__(self, knit):

2892

self._knit = knit

2893

2894

# Content objects, differs from fulltexts because of how final newlines

2895

# are treated by knits. the content objects here will always have a

2896

# final newline

2897

self._fulltext_contents = {}

2898

2899

# Annotated lines of specific revisions

2900

self._annotated_lines = {}

2901

2902

# Track the raw data for nodes that we could not process yet.

2903

# This maps the revision_id of the base to a list of children that will

2904

# annotated from it.

2905

self._pending_children = {}

2906

2907

# Nodes which cannot be extracted

2908

self._ghosts = set()

2909

2910

# Track how many children this node has, so we know if we need to keep

2911

# it

2912

self._annotate_children = {}

2913

self._compression_children = {}

2914

2915

self._all_build_details = {}

2916

# The children => parent revision_id graph

2917

self._revision_id_graph = {}

2918

2919

self._heads_provider = None

2920

2921

self._nodes_to_keep_annotations = set()

2922

self._generations_until_keep = 100

2923

2924

def set_generations_until_keep(self, value):

2925

"""Set the number of generations before caching a node.

2926

2927

Setting this to -1 will cache every merge node, setting this higher

2928

will cache fewer nodes.

2929

"""

2930

self._generations_until_keep = value

2931

2932

def _add_fulltext_content(self, revision_id, content_obj):

2933

self._fulltext_contents[revision_id] = content_obj

2934

# TODO: jam 20080305 It might be good to check the sha1digest here

2935

return content_obj.text()

2936

2937

def _check_parents(self, child, nodes_to_annotate):

2938

"""Check if all parents have been processed.

2939

2940

:param child: A tuple of (rev_id, parents, raw_content)

2941

:param nodes_to_annotate: If child is ready, add it to

2942

nodes_to_annotate, otherwise put it back in self._pending_children

2943

"""

2944

for parent_id in child[1]:

2945

if (parent_id not in self._annotated_lines):

2946

# This parent is present, but another parent is missing

2947

self._pending_children.setdefault(parent_id,

2948

[]).append(child)

2949

break

2682

2950

else:

2683

parent, sha1, noeol, delta = knit.get_delta(candidate)

2684

blocks = KnitContent.get_line_delta_blocks(delta,

2685

fulltext[parents[0]], fulltext[candidate])

2686

annotations[candidate] = list(annotate.reannotate([annotations[p]

2687

for p in parents], fulltext[candidate], candidate, blocks))

2688

return iter(annotations[revision_id])

2951

# This one is ready to be processed

2952

nodes_to_annotate.append(child)

2953

2954

def _add_annotation(self, revision_id, fulltext, parent_ids,

2955

left_matching_blocks=None):

2956

"""Add an annotation entry.

2957

2958

All parents should already have been annotated.

2959

:return: A list of children that now have their parents satisfied.

2960

"""

2961

a = self._annotated_lines

2962

annotated_parent_lines = [a[p] for p in parent_ids]

2963

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2964

fulltext, revision_id, left_matching_blocks,

2965

heads_provider=self._get_heads_provider()))

2966

self._annotated_lines[revision_id] = annotated_lines

2967

for p in parent_ids:

2968

ann_children = self._annotate_children[p]

2969

ann_children.remove(revision_id)

2970

if (not ann_children

2971

and p not in self._nodes_to_keep_annotations):

2972

del self._annotated_lines[p]

2973

del self._all_build_details[p]

2974

if p in self._fulltext_contents:

2975

del self._fulltext_contents[p]

2976

# Now that we've added this one, see if there are any pending

2977

# deltas to be done, certainly this parent is finished

2978

nodes_to_annotate = []

2979

for child in self._pending_children.pop(revision_id, []):

2980

self._check_parents(child, nodes_to_annotate)

2981

return nodes_to_annotate

2982

2983

def _get_build_graph(self, revision_id):

2984

"""Get the graphs for building texts and annotations.

2985

2986

The data you need for creating a full text may be different than the

2987

data you need to annotate that text. (At a minimum, you need both

2988

parents to create an annotation, but only need 1 parent to generate the

2989

fulltext.)

2990

2991

:return: A list of (revision_id, index_memo) records, suitable for

2992

passing to read_records_iter to start reading in the raw data fro/

2993

the pack file.

2994

"""

2995

if revision_id in self._annotated_lines:

2996

# Nothing to do

2997

return []

2998

pending = set([revision_id])

2999

records = []

3000

generation = 0

3001

kept_generation = 0

3002

while pending:

3003

# get all pending nodes

3004

generation += 1

3005

this_iteration = pending

3006

build_details = self._knit._index.get_build_details(this_iteration)

3007

self._all_build_details.update(build_details)

3008

# new_nodes = self._knit._index._get_entries(this_iteration)

3009

pending = set()

3010

for rev_id, details in build_details.iteritems():

3011

(index_memo, compression_parent, parents,

3012

record_details) = details

3013

self._revision_id_graph[rev_id] = parents

3014

records.append((rev_id, index_memo))

3015

# Do we actually need to check _annotated_lines?

3016

pending.update(p for p in parents

3017

if p not in self._all_build_details)

3018

if compression_parent:

3019

self._compression_children.setdefault(compression_parent,

3020

[]).append(rev_id)

3021

if parents:

3022

for parent in parents:

3023

self._annotate_children.setdefault(parent,

3024

[]).append(rev_id)

3025

num_gens = generation - kept_generation

3026

if ((num_gens >= self._generations_until_keep)

3027

and len(parents) > 1):

3028

kept_generation = generation

3029

self._nodes_to_keep_annotations.add(rev_id)

3030

3031

missing_versions = this_iteration.difference(build_details.keys())

3032

self._ghosts.update(missing_versions)

3033

for missing_version in missing_versions:

3034

# add a key, no parents

3035

self._revision_id_graph[missing_version] = ()

3036

pending.discard(missing_version) # don't look for it

3037

# XXX: This should probably be a real exception, as it is a data

3038

# inconsistency

3039

assert not self._ghosts.intersection(self._compression_children), \

3040

"We cannot have nodes which have a compression parent of a ghost."

3041

# Cleanout anything that depends on a ghost so that we don't wait for

3042

# the ghost to show up

3043

for node in self._ghosts:

3044

if node in self._annotate_children:

3045

# We won't be building this node

3046

del self._annotate_children[node]

3047

# Generally we will want to read the records in reverse order, because

3048

# we find the parent nodes after the children

3049

records.reverse()

3050

return records

3051

3052

def _annotate_records(self, records):

3053

"""Build the annotations for the listed records."""

3054

# We iterate in the order read, rather than a strict order requested

3055

# However, process what we can, and put off to the side things that

3056

# still need parents, cleaning them up when those parents are

3057

# processed.

3058

for (rev_id, record,

3059

digest) in self._knit._data.read_records_iter(records):

3060

if rev_id in self._annotated_lines:

3061

continue

3062

parent_ids = self._revision_id_graph[rev_id]

3063

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3064

details = self._all_build_details[rev_id]

3065

(index_memo, compression_parent, parents,

3066

record_details) = details

3067

nodes_to_annotate = []

3068

# TODO: Remove the punning between compression parents, and

3069

# parent_ids, we should be able to do this without assuming

3070

# the build order

3071

if len(parent_ids) == 0:

3072

# There are no parents for this node, so just add it

3073

# TODO: This probably needs to be decoupled

3074

assert compression_parent is None

3075

fulltext_content, delta = self._knit.factory.parse_record(

3076

rev_id, record, record_details, None)

3077

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3078

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3079

parent_ids, left_matching_blocks=None))

3080

else:

3081

child = (rev_id, parent_ids, record)

3082

# Check if all the parents are present

3083

self._check_parents(child, nodes_to_annotate)

3084

while nodes_to_annotate:

3085

# Should we use a queue here instead of a stack?

3086

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3087

(index_memo, compression_parent, parents,

3088

record_details) = self._all_build_details[rev_id]

3089

if compression_parent is not None:

3090

comp_children = self._compression_children[compression_parent]

3091

assert rev_id in comp_children

3092

# If there is only 1 child, it is safe to reuse this

3093

# content

3094

reuse_content = (len(comp_children) == 1

3095

and compression_parent not in

3096

self._nodes_to_keep_annotations)

3097

if reuse_content:

3098

# Remove it from the cache since it will be changing

3099

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3100

# Make sure to copy the fulltext since it might be

3101

# modified

3102

parent_fulltext = list(parent_fulltext_content.text())

3103

else:

3104

parent_fulltext_content = self._fulltext_contents[compression_parent]

3105

parent_fulltext = parent_fulltext_content.text()

3106

comp_children.remove(rev_id)

3107

fulltext_content, delta = self._knit.factory.parse_record(

3108

rev_id, record, record_details,

3109

parent_fulltext_content,

3110

copy_base_content=(not reuse_content))

3111

fulltext = self._add_fulltext_content(rev_id,

3112

fulltext_content)

3113

blocks = KnitContent.get_line_delta_blocks(delta,

3114

parent_fulltext, fulltext)

3115

else:

3116

fulltext_content = self._knit.factory.parse_fulltext(

3117

record, rev_id)

3118

fulltext = self._add_fulltext_content(rev_id,

3119

fulltext_content)

3120

blocks = None

3121

nodes_to_annotate.extend(

3122

self._add_annotation(rev_id, fulltext, parent_ids,

3123

left_matching_blocks=blocks))

3124

3125

def _get_heads_provider(self):

3126

"""Create a heads provider for resolving ancestry issues."""

3127

if self._heads_provider is not None:

3128

return self._heads_provider

3129

parent_provider = _mod_graph.DictParentsProvider(

3130

self._revision_id_graph)

3131

graph_obj = _mod_graph.Graph(parent_provider)

3132

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

3133

self._heads_provider = head_cache

3134

return head_cache

3135

3136

def annotate(self, revision_id):

3137

"""Return the annotated fulltext at the given revision.

3138

3139

:param revision_id: The revision id for this file

3140

"""

3141

records = self._get_build_graph(revision_id)

3142

if revision_id in self._ghosts:

3143

raise errors.RevisionNotPresent(revision_id, self._knit)

3144

self._annotate_records(records)

3145

return self._annotated_lines[revision_id]

2689

3146

2690

3147

2691

3148

try:

Older »