/brz/remove-bazaar : revision 4454.3.23

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2009-06-18 22:07:42 UTC
mto: This revision was merged to the branch mainline in revision 4522.
Revision ID: john@arbash-meinel.com-20090618220742-10czy03gxzh337fl

Initial attempt at refactoring _KnitAnnotator to derive from Annotator.

So far it seems possible, by just overriding _get_needed_texts.
However, we'll also want to override the get_matching_blocks stuff based on the delta.

files modified:
bzrlib/_annotator_py.py

bzrlib/annotate.py

bzrlib/knit.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

3305

return iter(annotator.annotate(revision_id))

3306

3307

3308

class _KnitAnnotator(object):

3308

class _KnitAnnotator(annotate.Annotator):

3309

"""Build up the annotations for a text."""

3310

3311

def __init__(self, knit):

3312

self._knit = knit

3313

3314

# Content objects, differs from fulltexts because of how final newlines

3315

# are treated by knits. the content objects here will always have a

3316

# final newline

3317

self._fulltext_contents = {}

3318

3319

# Annotated lines of specific revisions

3320

self._annotated_lines = {}

3321

3322

# Track the raw data for nodes that we could not process yet.

3323

# This maps the revision_id of the base to a list of children that will

3324

# annotated from it.

3325

self._pending_children = {}

3326

3327

# Nodes which cannot be extracted

3328

self._ghosts = set()

3329

3330

# Track how many children this node has, so we know if we need to keep

3331

# it

3332

self._annotate_children = {}

3333

self._compression_children = {}

3311

def __init__(self, vf):

3312

annotate.Annotator.__init__(self, vf)

3313

3314

# TODO: handle Nodes which cannot be extracted

3315

# self._ghosts = set()

3316

3317

# Map from revision_id => left_matching_blocks, should be 'use once'

3318

self._left_matching_blocks = {}

3319

3320

# KnitContent objects

3321

self._content_objects = {}

3322

# The number of children that depend on this fulltext content object

3323

self._num_compression_children = {}

3334

3324

3335

3325

self._all_build_details = {}

3336

# The children => parent revision_id graph

3337

self._revision_id_graph = {}

3338

3339

self._heads_provider = None

3340

3341

self._nodes_to_keep_annotations = set()

3342

self._generations_until_keep = 100

3343

3344

def set_generations_until_keep(self, value):

3345

"""Set the number of generations before caching a node.

3346

3347

Setting this to -1 will cache every merge node, setting this higher

3348

will cache fewer nodes.

3349

"""

3350

self._generations_until_keep = value

3351

3352

def _add_fulltext_content(self, revision_id, content_obj):

3353

self._fulltext_contents[revision_id] = content_obj

3354

# TODO: jam 20080305 It might be good to check the sha1digest here

3355

return content_obj.text()

3356

3357

def _check_parents(self, child, nodes_to_annotate):

3358

"""Check if all parents have been processed.

3359

3360

:param child: A tuple of (rev_id, parents, raw_content)

3361

:param nodes_to_annotate: If child is ready, add it to

3362

nodes_to_annotate, otherwise put it back in self._pending_children

3363

"""

3364

for parent_id in child[1]:

3365

if (parent_id not in self._annotated_lines):

3366

# This parent is present, but another parent is missing

3367

self._pending_children.setdefault(parent_id,

3368

[]).append(child)

3369

break

3370

else:

3371

# This one is ready to be processed

3372

nodes_to_annotate.append(child)

3373

3374

def _add_annotation(self, revision_id, fulltext, parent_ids,

3375

left_matching_blocks=None):

3376

"""Add an annotation entry.

3377

3378

All parents should already have been annotated.

3379

:return: A list of children that now have their parents satisfied.

3380

"""

3381

a = self._annotated_lines

3382

annotated_parent_lines = [a[p] for p in parent_ids]

3383

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

3384

fulltext, revision_id, left_matching_blocks,

3385

heads_provider=self._get_heads_provider()))

3386

self._annotated_lines[revision_id] = annotated_lines

3387

for p in parent_ids:

3388

ann_children = self._annotate_children[p]

3389

ann_children.remove(revision_id)

3390

if (not ann_children

3391

and p not in self._nodes_to_keep_annotations):

3392

del self._annotated_lines[p]

3393

del self._all_build_details[p]

3394

if p in self._fulltext_contents:

3395

del self._fulltext_contents[p]

3396

# Now that we've added this one, see if there are any pending

3397

# deltas to be done, certainly this parent is finished

3398

nodes_to_annotate = []

3399

for child in self._pending_children.pop(revision_id, []):

3400

self._check_parents(child, nodes_to_annotate)

3401

return nodes_to_annotate

3402

3326

3403

3327

def _get_build_graph(self, key):

3404

3328

"""Get the graphs for building texts and annotations.

3412

3336

passing to read_records_iter to start reading in the raw data from

3413

3337

the pack file.

3414

3338

"""

3415

if key in self._annotated_lines:

3416

# Nothing to do

3417

return []

3418

3339

pending = set([key])

3419

3340

records = []

3420

3341

generation = 0

3423

3344

# get all pending nodes

3424

3345

generation += 1

3425

3346

this_iteration = pending

3426

build_details = self._knit._index.get_build_details(this_iteration)

3347

build_details = self._vf._index.get_build_details(this_iteration)

3427

3348

self._all_build_details.update(build_details)

3428

# new_nodes = self._knit._index._get_entries(this_iteration)

3349

# new_nodes = self._vf._index._get_entries(this_iteration)

3429

3350

pending = set()

3430

3351

for key, details in build_details.iteritems():

3431

(index_memo, compression_parent, parents,

3352

(index_memo, compression_parent, parent_keys,

3432

3353

record_details) = details

3433

self._revision_id_graph[key] = parents

3354

self._parent_map[key] = parent_keys

3434

3355

records.append((key, index_memo))

3435

3356

# Do we actually need to check _annotated_lines?

3436

pending.update(p for p in parents

3357

pending.update(p for p in parent_keys

3437

3358

if p not in self._all_build_details)

3438

if compression_parent:

3439

self._compression_children.setdefault(compression_parent,

3440

[]).append(key)

3441

if parents:

3442

for parent in parents:

3443

self._annotate_children.setdefault(parent,

3444

[]).append(key)

3445

num_gens = generation - kept_generation

3446

if ((num_gens >= self._generations_until_keep)

3447

and len(parents) > 1):

3448

kept_generation = generation

3449

self._nodes_to_keep_annotations.add(key)

3359

if parent_keys:

3360

for parent_key in parent_keys:

3361

if parent_key in self._num_needed_children:

3362

self._num_needed_children[parent_key] += 1

3363

else:

3364

self._num_needed_children[parent_key] = 1

3450

3365

3451

3366

missing_versions = this_iteration.difference(build_details.keys())

3452

self._ghosts.update(missing_versions)

3453

for missing_version in missing_versions:

3454

# add a key, no parents

3455

self._revision_id_graph[missing_version] = ()

3456

pending.discard(missing_version) # don't look for it

3457

if self._ghosts.intersection(self._compression_children):

3458

raise KnitCorrupt(

3459

"We cannot have nodes which have a ghost compression parent:\n"

3460

"ghosts: %r\n"

3461

"compression children: %r"

3462

% (self._ghosts, self._compression_children))

3463

# Cleanout anything that depends on a ghost so that we don't wait for

3464

# the ghost to show up

3465

for node in self._ghosts:

3466

if node in self._annotate_children:

3467

# We won't be building this node

3468

del self._annotate_children[node]

3367

if missing_versions:

3368

raise ValueError('i dont handle ghosts')

3469

3369

# Generally we will want to read the records in reverse order, because

3470

3370

# we find the parent nodes after the children

3471

3371

records.reverse()

3472

3372

return records

3473

3373

3474

def _annotate_records(self, records):

3475

"""Build the annotations for the listed records."""

3374

def _get_needed_texts(self, key, pb=None):

3375

if len(self._vf._fallback_vfs) > 0:

3376

# If we have fallbacks, go to the generic path

3377

for v in super(_KnitAnnotator, self)._get_needed_texts(key, pb=pb):

3378

yield v

3379

while True:

3380

try:

3381

records = self._get_build_graph(key)

3382

for key, text, num_lines in self._extract_texts(records):

3383

yield key, text, num_lines

3384

except errors.RetryWithNewPacks, e:

3385

self._vf._access.reload_or_raise(e)

3386

# The cached build_details are no longer valid

3387

self._all_build_details.clear()

3388

3389

def _extract_texts(self, records):

3390

"""Extract the various texts needed based on records"""

3476

3391

# We iterate in the order read, rather than a strict order requested

3477

3392

# However, process what we can, and put off to the side things that

3478

3393

# still need parents, cleaning them up when those parents are

3479

3394

# processed.

3480

for (rev_id, record,

3481

digest) in self._knit._read_records_iter(records):

3482

if rev_id in self._annotated_lines:

3483

continue

3484

parent_ids = self._revision_id_graph[rev_id]

3485

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3486

details = self._all_build_details[rev_id]

3487

(index_memo, compression_parent, parents,

3395

# Children that we want to annotate as soon as we get the parent text

3396

# Map from parent_key => [child_key]

3397

pending_annotation = {}

3398

# Children that are missing their compression parent

3399

pending_deltas = {}

3400

for (key, record,

3401

digest) in self._vf._read_records_iter(records):

3402

# parent_ids = [p for p in parent_ids if p not in self._ghosts]

3403

details = self._all_build_details[keys]

3404

(index_memo, compression_parent, parent_keys,

3488

3405

record_details) = details

3489

nodes_to_annotate = []

3406

assert parent_keys == self._parent_map[key]

3490

3407

# TODO: Remove the punning between compression parents, and

3491

3408

# parent_ids, we should be able to do this without assuming

3492

3409

# the build order

3493

if len(parent_ids) == 0:

3494

# There are no parents for this node, so just add it

3495

# TODO: This probably needs to be decoupled

3496

fulltext_content, delta = self._knit._factory.parse_record(

3410

if compression_parent:

3411

# This is a delta, do we have the parent fulltext?

3412

if compression_parent not in self._fulltext_contents:

3413

# Waiting for the parent

3414

pending_deltas.setdefault(compression_parent, []).append(

3415

(key, parent_keys, record, record_details))

3416

continue

3417

# We can build this as a fulltext

3418

parent_content = self._content_objects[compression_parent]

3419

content, delta = self._vf._factory.parse_record(

3420

key, record, record_details,

3421

parent_content,

3422

# TODO: track when we can copy the base

3423

copy_base_content=True)

3424

else:

3425

# No compression parent means we have a fulltext

3426

content, delta = self._vf._factory.parse_record(

3497

3427

rev_id, record, record_details, None)

3498

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3499

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3500

parent_ids, left_matching_blocks=None))

3501

else:

3502

child = (rev_id, parent_ids, record)

3503

# Check if all the parents are present

3504

self._check_parents(child, nodes_to_annotate)

3505

while nodes_to_annotate:

3506

# Should we use a queue here instead of a stack?

3507

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3508

(index_memo, compression_parent, parents,

3509

record_details) = self._all_build_details[rev_id]

3510

blocks = None

3511

if compression_parent is not None:

3512

comp_children = self._compression_children[compression_parent]

3513

if rev_id not in comp_children:

3514

raise AssertionError("%r not in compression children %r"

3515

% (rev_id, comp_children))

3516

# If there is only 1 child, it is safe to reuse this

3517

# content

3518

reuse_content = (len(comp_children) == 1

3519

and compression_parent not in

3520

self._nodes_to_keep_annotations)

3521

if reuse_content:

3522

# Remove it from the cache since it will be changing

3523

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3524

# Make sure to copy the fulltext since it might be

3525

# modified

3526

parent_fulltext = list(parent_fulltext_content.text())

3428

self._content_objects[key] = content

3429

to_process = [(key, parent_keys, content)]

3430

# Check for compression children that we can expand

3431

if key in pending_deltas:

3432

children = pending_deltas.pop(key)

3433

for child_key, child_parent_keys, child_record, child_details in children:

3434

child_content, child_delta = self._vf._factory.parse_record(

3435

child_key, child_record, child_details,

3436

content,

3437

# TODO: track when we can copy the base

3438

copy_base_content=True)

3439

self._content_objects[child_key] = child_content

3440

to_process.append((child_key, child_parent_keys, child_content))

3441

while to_process:

3442

cur = to_process

3443

to_process = []

3444

for key, parent_keys, content in cur:

3445

# Check if all parents are present

3446

for parent_key in parent_keys:

3447

if parent_key not in self._annotations_cache:

3448

# still waiting on a parent text

3449

pending_annotation.setdefault(parent_key,

3450

[]).append((key, parent_keys, content))

3451

break

3527

3452

else:

3528

parent_fulltext_content = self._fulltext_contents[compression_parent]

3529

parent_fulltext = parent_fulltext_content.text()

3530

comp_children.remove(rev_id)

3531

fulltext_content, delta = self._knit._factory.parse_record(

3532

rev_id, record, record_details,

3533

parent_fulltext_content,

3534

copy_base_content=(not reuse_content))

3535

fulltext = self._add_fulltext_content(rev_id,

3536

fulltext_content)

3537

if compression_parent == parent_ids[0]:

3538

# the compression_parent is the left parent, so we can

3539

# re-use the delta

3540

blocks = KnitContent.get_line_delta_blocks(delta,

3541

parent_fulltext, fulltext)

3542

else:

3543

fulltext_content = self._knit._factory.parse_fulltext(

3544

record, rev_id)

3545

fulltext = self._add_fulltext_content(rev_id,

3546

fulltext_content)

3547

nodes_to_annotate.extend(

3548

self._add_annotation(rev_id, fulltext, parent_ids,

3549

left_matching_blocks=blocks))

3550

3551

def _get_heads_provider(self):

3552

"""Create a heads provider for resolving ancestry issues."""

3553

if self._heads_provider is not None:

3554

return self._heads_provider

3555

self._heads_provider = _mod_graph.KnownGraph(self._revision_id_graph)

3556

return self._heads_provider

3453

# All parents present

3454

lines = content.text()

3455

yield key, lines, len(lines)

3456

if key in pending_annotation:

3457

to_process.extend(pending_annotation.pop(key))

3557

3458

3558

3459

def annotate(self, key):

3559

3460

"""Return the annotated fulltext at the given key.

3560

3461

3561

3462

:param key: The key to annotate.

3562

3463

"""

3563

if len(self._knit._fallback_vfs) > 0:

3464

if len(self._vf._fallback_vfs) > 0:

3564

3465

# stacked knits can't use the fast path at present.

3565

3466

return self._simple_annotate(key)

3566

3467

while True:

3567

3468

try:

3568

3469

records = self._get_build_graph(key)

3569

3470

if key in self._ghosts:

3570

raise errors.RevisionNotPresent(key, self._knit)

3471

raise errors.RevisionNotPresent(key, self._vf)

3571

3472

self._annotate_records(records)

3572

3473

return self._annotated_lines[key]

3573

3474

except errors.RetryWithNewPacks, e:

3574

self._knit._access.reload_or_raise(e)

3475

self._vf._access.reload_or_raise(e)

3575

3476

# The cached build_details are no longer valid

3576

3477

self._all_build_details.clear()

3577

3478

3578

def _simple_annotate(self, key):

3579

"""Return annotated fulltext, rediffing from the full texts.

3580

3581

This is slow but makes no assumptions about the repository

3582

being able to produce line deltas.

3583

"""

3584

# TODO: this code generates a parent maps of present ancestors; it

3585

# could be split out into a separate method

3586

# -- mbp and robertc 20080704

3587

graph = _mod_graph.Graph(self._knit)

3588

parent_map = dict((k, v) for k, v in graph.iter_ancestry([key])

3589

if v is not None)

3590

if not parent_map:

3591

raise errors.RevisionNotPresent(key, self)

3592

keys = parent_map.keys()

3593

heads_provider = _mod_graph.KnownGraph(parent_map)

3594

parent_cache = {}

3595

reannotate = annotate.reannotate

3596

for record in self._knit.get_record_stream(keys, 'topological', True):

3597

key = record.key

3598

fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

3599

parents = parent_map[key]

3600

if parents is not None:

3601

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

3602

else:

3603

parent_lines = []

3604

parent_cache[key] = list(

3605

reannotate(parent_lines, fulltext, key, None, heads_provider))

3606

try:

3607

return parent_cache[key]

3608

except KeyError, e:

3609

raise errors.RevisionNotPresent(key, self._knit)

3610

3611

3479

3612

3480

try:

3613

3481

from bzrlib._knit_load_data_c import _load_data_c as _load_data

Older »