/brz/remove-bazaar : revision 4449.3.31

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2009-07-17 10:38:41 UTC
mfrom: (4536 +trunk)
mto: This revision was merged to the branch mainline in revision 4558.
Revision ID: mbp@sourcefrog.net-20090717103841-z35onk04bkiw7zb6

Merge trunk

files added:
bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/test__annotator.py

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/es/quick-reference/quick-start-summary.pdf

doc/es/quick-reference/quick-start-summary.png

tools/win32/bootstrap.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

files renamed:
bzrlib/_btree_serializer_c.pyx => bzrlib/_btree_serializer_pyx.pyx

bzrlib/_dirstate_helpers_c.h => bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_c.pyx => bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_knit_load_data_c.pyx => bzrlib/_knit_load_data_pyx.pyx

tools/doc_generate/ => bzrlib/doc_generate/

bzrlib/tests/branch_implementations/ => bzrlib/tests/per_branch/

bzrlib/tests/bzrdir_implementations/ => bzrlib/tests/per_bzrdir/

bzrlib/tests/interrepository_implementations/ => bzrlib/tests/per_interrepository/

bzrlib/tests/intertree_implementations/ => bzrlib/tests/per_intertree/

bzrlib/tests/inventory_implementations/ => bzrlib/tests/per_inventory/

bzrlib/tests/test_transport_implementations.py => bzrlib/tests/per_transport.py

bzrlib/tests/tree_implementations/ => bzrlib/tests/per_tree/

bzrlib/tests/workingtree_implementations/ => bzrlib/tests/per_workingtree/

doc/es/guia-desarrollador/ => doc/es/developer-guide/

doc/es/referencia-rapida/ => doc/es/quick-reference/

doc/es/referencia-rapida/referencia-rapida.svg => doc/es/quick-reference/quick-start-summary.svg

doc/es/notas-version/ => doc/es/release-notes/

doc/es/guia-usuario/ => doc/es/user-guide/

doc/es/referencia/ => doc/es/user-reference/

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/annotate.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bzrdir.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/commands.py

bzrlib/config.py

bzrlib/dirstate.py

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/groupcompress.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lru_cache.py

bzrlib/merge.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/progress.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisiontree.py

bzrlib/send.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/switch.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/http_server.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/trace.py

bzrlib/tree.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/util/bencode.py

bzrlib/versionedfile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml8.py

doc/developers/inventory.txt

doc/en/tutorials/tutorial.txt

doc/es/mini-tutorial/index.txt

doc/es/quick-reference/Makefile

doc/index.es.txt

doc/index.txt

setup.py

tools/time_graph.py

tools/win32/build_release.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

from cStringIO import StringIO

from itertools import izip, chain

from itertools import izip

import operator

import os

import sys

664

665

see parse_fulltext which this inverts.

666

"""

667

# TODO: jam 20070209 We only do the caching thing to make sure that

668

# the origin is a valid utf-8 line, eventually we could remove it

669

667

return ['%s %s' % (o, t) for o, t in content._lines]

670

668

671

669

def lower_line_delta(self, delta):

686

684

content = knit._get_content(key)

687

685

# adjust for the fact that serialised annotations are only key suffixes

688

686

# for this factory.

689

if type(key) == tuple:

687

if type(key) is tuple:

690

688

prefix = key[:-1]

691

689

origins = content.annotate()

692

690

result = []

758

756

759

757

def annotate(self, knit, key):

760

758

annotator = _KnitAnnotator(knit)

761

return annotator.annotate(key)

759

return annotator.annotate_flat(key)

762

760

763

761

764

762

909

907

# indexes can't directly store that, so we give them

910

908

# an empty tuple instead.

911

909

parents = ()

910

line_bytes = ''.join(lines)

912

911

return self._add(key, lines, parents,

913

parent_texts, left_matching_blocks, nostore_sha, random_id)

912

parent_texts, left_matching_blocks, nostore_sha, random_id,

913

line_bytes=line_bytes)

914

915

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

916

"""See VersionedFiles._add_text()."""

917

self._index._check_write_ok()

918

self._check_add(key, None, random_id, check_content=False)

919

if text.__class__ is not str:

920

raise errors.BzrBadParameterUnicode("text")

921

if parents is None:

922

# The caller might pass None if there is no graph data, but kndx

923

# indexes can't directly store that, so we give them

924

# an empty tuple instead.

925

parents = ()

926

return self._add(key, None, parents,

927

None, None, nostore_sha, random_id,

928

line_bytes=text)

914

929

915

930

def _add(self, key, lines, parents, parent_texts,

916

left_matching_blocks, nostore_sha, random_id):

931

left_matching_blocks, nostore_sha, random_id,

932

line_bytes):

917

933

"""Add a set of lines on top of version specified by parents.

918

934

919

935

Any versions not present will be converted into ghosts.

936

937

:param lines: A list of strings where each one is a single line (has a

938

single newline at the end of the string) This is now optional

939

(callers can pass None). It is left in its location for backwards

940

compatibility. It should ''.join(lines) must == line_bytes

941

:param line_bytes: A single string containing the content

942

943

We pass both lines and line_bytes because different routes bring the

944

values to this function. And for memory efficiency, we don't want to

945

have to split/join on-demand.

920

946

"""

921

947

# first thing, if the content is something we don't need to store, find

922

948

# that out.

923

line_bytes = ''.join(lines)

924

949

digest = sha_string(line_bytes)

925

950

if nostore_sha == digest:

926

951

raise errors.ExistingContent

947

972

948

973

text_length = len(line_bytes)

949

974

options = []

950

if lines:

951

if lines[-1][-1] != '\n':

952

# copy the contents of lines.

975

no_eol = False

976

# Note: line_bytes is not modified to add a newline, that is tracked

977

# via the no_eol flag. 'lines' *is* modified, because that is the

978

# general values needed by the Content code.

979

if line_bytes and line_bytes[-1] != '\n':

980

options.append('no-eol')

981

no_eol = True

982

# Copy the existing list, or create a new one

983

if lines is None:

984

lines = osutils.split_lines(line_bytes)

985

else:

953

986

lines = lines[:]

954

options.append('no-eol')

955

lines[-1] = lines[-1] + '\n'

956

line_bytes += '\n'

987

# Replace the last line with one that ends in a final newline

988

lines[-1] = lines[-1] + '\n'

989

if lines is None:

990

lines = osutils.split_lines(line_bytes)

957

991

958

992

for element in key[:-1]:

959

if type(element) != str:

993

if type(element) is not str:

960

994

raise TypeError("key contains non-strings: %r" % (key,))

961

995

if key[-1] is None:

962

996

key = key[:-1] + ('sha1:' + digest,)

963

elif type(key[-1]) != str:

997

elif type(key[-1]) is not str:

964

998

raise TypeError("key contains non-strings: %r" % (key,))

965

999

# Knit hunks are still last-element only

966

1000

version_id = key[-1]

967

1001

content = self._factory.make(lines, version_id)

968

if 'no-eol' in options:

1002

if no_eol:

969

1003

# Hint to the content object that its text() call should strip the

970

1004

# EOL.

971

1005

content._should_strip_eol = True

986

1020

if self._factory.__class__ is KnitPlainFactory:

987

1021

# Use the already joined bytes saving iteration time in

988

1022

# _record_to_data.

1023

dense_lines = [line_bytes]

1024

if no_eol:

1025

dense_lines.append('\n')

989

1026

size, bytes = self._record_to_data(key, digest,

990

lines, [line_bytes])

1027

lines, dense_lines)

991

1028

else:

992

1029

# get mixed annotation + content and feed it into the

993

1030

# serialiser.

1005

1042

"""See VersionedFiles.annotate."""

1006

1043

return self._factory.annotate(self, key)

1007

1044

1045

def get_annotator(self):

1046

return _KnitAnnotator(self)

1047

1008

1048

def check(self, progress_bar=None):

1009

1049

"""See VersionedFiles.check()."""

1010

1050

# This doesn't actually test extraction of everything, but that will

1920

1960

function spends less time resizing the final string.

1921

1961

:return: (len, a StringIO instance with the raw data ready to read.)

1922

1962

"""

1923

# Note: using a string copy here increases memory pressure with e.g.

1924

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1925

# when doing the initial commit of a mozilla tree. RBC 20070921

1926

bytes = ''.join(chain(

1927

["version %s %d %s\n" % (key[-1],

1928

len(lines),

1929

digest)],

1930

dense_lines or lines,

1931

["end %s\n" % key[-1]]))

1932

if type(bytes) != str:

1933

raise AssertionError(

1934

'data must be plain bytes was %s' % type(bytes))

1963

chunks = ["version %s %d %s\n" % (key[-1], len(lines), digest)]

1964

chunks.extend(dense_lines or lines)

1965

chunks.append("end %s\n" % key[-1])

1966

for chunk in chunks:

1967

if type(chunk) is not str:

1968

raise AssertionError(

1969

'data must be plain bytes was %s' % type(chunk))

1935

1970

if lines and lines[-1][-1] != '\n':

1936

1971

raise ValueError('corrupt lines value %r' % lines)

1937

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1972

compressed_bytes = tuned_gzip.chunks_to_gzip(chunks)

1938

1973

return len(compressed_bytes), compressed_bytes

1939

1974

1940

1975

def _split_header(self, line):

2005

2040

missing_keys.remove(record.key)

2006

2041

yield record

2007

2042

2008

self._raw_record_map = self.vf._get_record_map_unparsed(self.keys,

2009

allow_missing=True)

2043

if self._raw_record_map is None:

2044

raise AssertionError('_raw_record_map should have been filled')

2010

2045

first = True

2011

2046

for key in self.keys:

2012

2047

if key in self.nonlocal_keys:

2375

2410

line = "\n%s %s %s %s %s :" % (

2376

2411

key[-1], ','.join(options), pos, size,

2377

2412

self._dictionary_compress(parents))

2378

if type(line) != str:

2413

if type(line) is not str:

2379

2414

raise AssertionError(

2380

2415

'data must be utf8 was %s' % type(line))

2381

2416

lines.append(line)

2570

2605

result = set()

2571

2606

# Identify all key prefixes.

2572

2607

# XXX: A bit hacky, needs polish.

2573

if type(self._mapper) == ConstantMapper:

2608

if type(self._mapper) is ConstantMapper:

2574

2609

prefixes = [()]

2575

2610

else:

2576

2611

relpaths = set()

2608

2643

del self._history

2609

2644

except NoSuchFile:

2610

2645

self._kndx_cache[prefix] = ({}, [])

2611

if type(self._mapper) == ConstantMapper:

2646

if type(self._mapper) is ConstantMapper:

2612

2647

# preserve behaviour for revisions.kndx etc.

2613

2648

self._init_index(path)

2614

2649

del self._cache

3094

3129

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

3095

3130

length), where the key is the record key.

3096

3131

"""

3097

if type(raw_data) != str:

3132

if type(raw_data) is not str:

3098

3133

raise AssertionError(

3099

3134

'data must be plain bytes was %s' % type(raw_data))

3100

3135

result = []

3183

3218

length), where the index field is the write_index object supplied

3184

3219

to the PackAccess object.

3185

3220

"""

3186

if type(raw_data) != str:

3221

if type(raw_data) is not str:

3187

3222

raise AssertionError(

3188

3223

'data must be plain bytes was %s' % type(raw_data))

3189

3224

result = []

3302

3337

recommended.

3303

3338

"""

3304

3339

annotator = _KnitAnnotator(knit)

3305

return iter(annotator.annotate(revision_id))

3306

3307

3308

class _KnitAnnotator(object):

3340

return iter(annotator.annotate_flat(revision_id))

3341

3342

3343

class _KnitAnnotator(annotate.Annotator):

3309

3344

"""Build up the annotations for a text."""

3310

3345

3311

def __init__(self, knit):

3312

self._knit = knit

3313

3314

# Content objects, differs from fulltexts because of how final newlines

3315

# are treated by knits. the content objects here will always have a

3316

# final newline

3317

self._fulltext_contents = {}

3318

3319

# Annotated lines of specific revisions

3320

self._annotated_lines = {}

3321

3322

# Track the raw data for nodes that we could not process yet.

3323

# This maps the revision_id of the base to a list of children that will

3324

# annotated from it.

3325

self._pending_children = {}

3326

3327

# Nodes which cannot be extracted

3328

self._ghosts = set()

3329

3330

# Track how many children this node has, so we know if we need to keep

3331

# it

3332

self._annotate_children = {}

3333

self._compression_children = {}

3346

def __init__(self, vf):

3347

annotate.Annotator.__init__(self, vf)

3348

3349

# TODO: handle Nodes which cannot be extracted

3350

# self._ghosts = set()

3351

3352

# Map from (key, parent_key) => matching_blocks, should be 'use once'

3353

self._matching_blocks = {}

3354

3355

# KnitContent objects

3356

self._content_objects = {}

3357

# The number of children that depend on this fulltext content object

3358

self._num_compression_children = {}

3359

# Delta records that need their compression parent before they can be

3360

# expanded

3361

self._pending_deltas = {}

3362

# Fulltext records that are waiting for their parents fulltexts before

3363

# they can be yielded for annotation

3364

self._pending_annotation = {}

3334

3365

3335

3366

self._all_build_details = {}

3336

# The children => parent revision_id graph

3337

self._revision_id_graph = {}

3338

3339

self._heads_provider = None

3340

3341

self._nodes_to_keep_annotations = set()

3342

self._generations_until_keep = 100

3343

3344

def set_generations_until_keep(self, value):

3345

"""Set the number of generations before caching a node.

3346

3347

Setting this to -1 will cache every merge node, setting this higher

3348

will cache fewer nodes.

3349

"""

3350

self._generations_until_keep = value

3351

3352

def _add_fulltext_content(self, revision_id, content_obj):

3353

self._fulltext_contents[revision_id] = content_obj

3354

# TODO: jam 20080305 It might be good to check the sha1digest here

3355

return content_obj.text()

3356

3357

def _check_parents(self, child, nodes_to_annotate):

3358

"""Check if all parents have been processed.

3359

3360

:param child: A tuple of (rev_id, parents, raw_content)

3361

:param nodes_to_annotate: If child is ready, add it to

3362

nodes_to_annotate, otherwise put it back in self._pending_children

3363

"""

3364

for parent_id in child[1]:

3365

if (parent_id not in self._annotated_lines):

3366

# This parent is present, but another parent is missing

3367

self._pending_children.setdefault(parent_id,

3368

[]).append(child)

3369

break

3370

else:

3371

# This one is ready to be processed

3372

nodes_to_annotate.append(child)

3373

3374

def _add_annotation(self, revision_id, fulltext, parent_ids,

3375

left_matching_blocks=None):

3376

"""Add an annotation entry.

3377

3378

All parents should already have been annotated.

3379

:return: A list of children that now have their parents satisfied.

3380

"""

3381

a = self._annotated_lines

3382

annotated_parent_lines = [a[p] for p in parent_ids]

3383

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

3384

fulltext, revision_id, left_matching_blocks,

3385

heads_provider=self._get_heads_provider()))

3386

self._annotated_lines[revision_id] = annotated_lines

3387

for p in parent_ids:

3388

ann_children = self._annotate_children[p]

3389

ann_children.remove(revision_id)

3390

if (not ann_children

3391

and p not in self._nodes_to_keep_annotations):

3392

del self._annotated_lines[p]

3393

del self._all_build_details[p]

3394

if p in self._fulltext_contents:

3395

del self._fulltext_contents[p]

3396

# Now that we've added this one, see if there are any pending

3397

# deltas to be done, certainly this parent is finished

3398

nodes_to_annotate = []

3399

for child in self._pending_children.pop(revision_id, []):

3400

self._check_parents(child, nodes_to_annotate)

3401

return nodes_to_annotate

3402

3367

3403

3368

def _get_build_graph(self, key):

3404

3369

"""Get the graphs for building texts and annotations.

3412

3377

passing to read_records_iter to start reading in the raw data from

3413

3378

the pack file.

3414

3379

"""

3415

if key in self._annotated_lines:

3416

# Nothing to do

3417

return []

3418

3380

pending = set([key])

3419

3381

records = []

3420

generation = 0

3421

kept_generation = 0

3382

ann_keys = set()

3383

self._num_needed_children[key] = 1

3422

3384

while pending:

3423

3385

# get all pending nodes

3424

generation += 1

3425

3386

this_iteration = pending

3426

build_details = self._knit._index.get_build_details(this_iteration)

3387

build_details = self._vf._index.get_build_details(this_iteration)

3427

3388

self._all_build_details.update(build_details)

3428

# new_nodes = self._knit._index._get_entries(this_iteration)

3389

# new_nodes = self._vf._index._get_entries(this_iteration)

3429

3390

pending = set()

3430

3391

for key, details in build_details.iteritems():

3431

(index_memo, compression_parent, parents,

3392

(index_memo, compression_parent, parent_keys,

3432

3393

record_details) = details

3433

self._revision_id_graph[key] = parents

3394

self._parent_map[key] = parent_keys

3395

self._heads_provider = None

3434

3396

records.append((key, index_memo))

3435

3397

# Do we actually need to check _annotated_lines?

3436

pending.update(p for p in parents

3437

if p not in self._all_build_details)

3398

pending.update([p for p in parent_keys

3399

if p not in self._all_build_details])

3400

if parent_keys:

3401

for parent_key in parent_keys:

3402

if parent_key in self._num_needed_children:

3403

self._num_needed_children[parent_key] += 1

3404

else:

3405

self._num_needed_children[parent_key] = 1

3438

3406

if compression_parent:

3439

self._compression_children.setdefault(compression_parent,

3440

[]).append(key)

3441

if parents:

3442

for parent in parents:

3443

self._annotate_children.setdefault(parent,

3444

[]).append(key)

3445

num_gens = generation - kept_generation

3446

if ((num_gens >= self._generations_until_keep)

3447

and len(parents) > 1):

3448

kept_generation = generation

3449

self._nodes_to_keep_annotations.add(key)

3407

if compression_parent in self._num_compression_children:

3408

self._num_compression_children[compression_parent] += 1

3409

else:

3410

self._num_compression_children[compression_parent] = 1

3450

3411

3451

3412

missing_versions = this_iteration.difference(build_details.keys())

3452

self._ghosts.update(missing_versions)

3453

for missing_version in missing_versions:

3454

# add a key, no parents

3455

self._revision_id_graph[missing_version] = ()

3456

pending.discard(missing_version) # don't look for it

3457

if self._ghosts.intersection(self._compression_children):

3458

raise KnitCorrupt(

3459

"We cannot have nodes which have a ghost compression parent:\n"

3460

"ghosts: %r\n"

3461

"compression children: %r"

3462

% (self._ghosts, self._compression_children))

3463

# Cleanout anything that depends on a ghost so that we don't wait for

3464

# the ghost to show up

3465

for node in self._ghosts:

3466

if node in self._annotate_children:

3467

# We won't be building this node

3468

del self._annotate_children[node]

3413

if missing_versions:

3414

for key in missing_versions:

3415

if key in self._parent_map and key in self._text_cache:

3416

# We already have this text ready, we just need to

3417

# yield it later so we get it annotated

3418

ann_keys.add(key)

3419

parent_keys = self._parent_map[key]

3420

for parent_key in parent_keys:

3421

if parent_key in self._num_needed_children:

3422

self._num_needed_children[parent_key] += 1

3423

else:

3424

self._num_needed_children[parent_key] = 1

3425

pending.update([p for p in parent_keys

3426

if p not in self._all_build_details])

3427

else:

3428

raise errors.RevisionNotPresent(key, self._vf)

3469

3429

# Generally we will want to read the records in reverse order, because

3470

3430

# we find the parent nodes after the children

3471

3431

records.reverse()

3472

return records

3473

3474

def _annotate_records(self, records):

3475

"""Build the annotations for the listed records."""

3432

return records, ann_keys

3433

3434

def _get_needed_texts(self, key, pb=None):

3435

# if True or len(self._vf._fallback_vfs) > 0:

3436

if len(self._vf._fallback_vfs) > 0:

3437

# If we have fallbacks, go to the generic path

3438

for v in annotate.Annotator._get_needed_texts(self, key, pb=pb):

3439

yield v

3440

return

3441

while True:

3442

try:

3443

records, ann_keys = self._get_build_graph(key)

3444

for idx, (sub_key, text, num_lines) in enumerate(

3445

self._extract_texts(records)):

3446

if pb is not None:

3447

pb.update('annotating', idx, len(records))

3448

yield sub_key, text, num_lines

3449

for sub_key in ann_keys:

3450

text = self._text_cache[sub_key]

3451

num_lines = len(text) # bad assumption

3452

yield sub_key, text, num_lines

3453

return

3454

except errors.RetryWithNewPacks, e:

3455

self._vf._access.reload_or_raise(e)

3456

# The cached build_details are no longer valid

3457

self._all_build_details.clear()

3458

3459

def _cache_delta_blocks(self, key, compression_parent, delta, lines):

3460

parent_lines = self._text_cache[compression_parent]

3461

blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines))

3462

self._matching_blocks[(key, compression_parent)] = blocks

3463

3464

def _expand_record(self, key, parent_keys, compression_parent, record,

3465

record_details):

3466

delta = None

3467

if compression_parent:

3468

if compression_parent not in self._content_objects:

3469

# Waiting for the parent

3470

self._pending_deltas.setdefault(compression_parent, []).append(

3471

(key, parent_keys, record, record_details))

3472

return None

3473

# We have the basis parent, so expand the delta

3474

num = self._num_compression_children[compression_parent]

3475

num -= 1

3476

if num == 0:

3477

base_content = self._content_objects.pop(compression_parent)

3478

self._num_compression_children.pop(compression_parent)

3479

else:

3480

self._num_compression_children[compression_parent] = num

3481

base_content = self._content_objects[compression_parent]

3482

# It is tempting to want to copy_base_content=False for the last

3483

# child object. However, whenever noeol=False,

3484

# self._text_cache[parent_key] is content._lines. So mutating it

3485

# gives very bad results.

3486

# The alternative is to copy the lines into text cache, but then we

3487

# are copying anyway, so just do it here.

3488

content, delta = self._vf._factory.parse_record(

3489

key, record, record_details, base_content,

3490

copy_base_content=True)

3491

else:

3492

# Fulltext record

3493

content, _ = self._vf._factory.parse_record(

3494

key, record, record_details, None)

3495

if self._num_compression_children.get(key, 0) > 0:

3496

self._content_objects[key] = content

3497

lines = content.text()

3498

self._text_cache[key] = lines

3499

if delta is not None:

3500

self._cache_delta_blocks(key, compression_parent, delta, lines)

3501

return lines

3502

3503

def _get_parent_annotations_and_matches(self, key, text, parent_key):

3504

"""Get the list of annotations for the parent, and the matching lines.

3505

3506

:param text: The opaque value given by _get_needed_texts

3507

:param parent_key: The key for the parent text

3508

:return: (parent_annotations, matching_blocks)

3509

parent_annotations is a list as long as the number of lines in

3510

parent

3511

matching_blocks is a list of (parent_idx, text_idx, len) tuples

3512

indicating which lines match between the two texts

3513

"""

3514

block_key = (key, parent_key)

3515

if block_key in self._matching_blocks:

3516

blocks = self._matching_blocks.pop(block_key)

3517

parent_annotations = self._annotations_cache[parent_key]

3518

return parent_annotations, blocks

3519

return annotate.Annotator._get_parent_annotations_and_matches(self,

3520

key, text, parent_key)

3521

3522

def _process_pending(self, key):

3523

"""The content for 'key' was just processed.

3524

3525

Determine if there is any more pending work to be processed.

3526

"""

3527

to_return = []

3528

if key in self._pending_deltas:

3529

compression_parent = key

3530

children = self._pending_deltas.pop(key)

3531

for child_key, parent_keys, record, record_details in children:

3532

lines = self._expand_record(child_key, parent_keys,

3533

compression_parent,

3534

record, record_details)

3535

if self._check_ready_for_annotations(child_key, parent_keys):

3536

to_return.append(child_key)

3537

# Also check any children that are waiting for this parent to be

3538

# annotation ready

3539

if key in self._pending_annotation:

3540

children = self._pending_annotation.pop(key)

3541

to_return.extend([c for c, p_keys in children

3542

if self._check_ready_for_annotations(c, p_keys)])

3543

return to_return

3544

3545

def _check_ready_for_annotations(self, key, parent_keys):

3546

"""return true if this text is ready to be yielded.

3547

3548

Otherwise, this will return False, and queue the text into

3549

self._pending_annotation

3550

"""

3551

for parent_key in parent_keys:

3552

if parent_key not in self._annotations_cache:

3553

# still waiting on at least one parent text, so queue it up

3554

# Note that if there are multiple parents, we need to wait

3555

# for all of them.

3556

self._pending_annotation.setdefault(parent_key,

3557

[]).append((key, parent_keys))

3558

return False

3559

return True

3560

3561

def _extract_texts(self, records):

3562

"""Extract the various texts needed based on records"""

3476

3563

# We iterate in the order read, rather than a strict order requested

3477

3564

# However, process what we can, and put off to the side things that

3478

3565

# still need parents, cleaning them up when those parents are

3479

3566

# processed.

3480

for (rev_id, record,

3481

digest) in self._knit._read_records_iter(records):

3482

if rev_id in self._annotated_lines:

3567

# Basic data flow:

3568

# 1) As 'records' are read, see if we can expand these records into

3569

# Content objects (and thus lines)

3570

# 2) If a given line-delta is waiting on its compression parent, it

3571

# gets queued up into self._pending_deltas, otherwise we expand

3572

# it, and put it into self._text_cache and self._content_objects

3573

# 3) If we expanded the text, we will then check to see if all

3574

# parents have also been processed. If so, this text gets yielded,

3575

# else this record gets set aside into pending_annotation

3576

# 4) Further, if we expanded the text in (2), we will then check to

3577

# see if there are any children in self._pending_deltas waiting to

3578

# also be processed. If so, we go back to (2) for those

3579

# 5) Further again, if we yielded the text, we can then check if that

3580

# 'unlocks' any of the texts in pending_annotations, which should

3581

# then get yielded as well

3582

# Note that both steps 4 and 5 are 'recursive' in that unlocking one

3583

# compression child could unlock yet another, and yielding a fulltext

3584

# will also 'unlock' the children that are waiting on that annotation.

3585

# (Though also, unlocking 1 parent's fulltext, does not unlock a child

3586

# if other parents are also waiting.)

3587

# We want to yield content before expanding child content objects, so

3588

# that we know when we can re-use the content lines, and the annotation

3589

# code can know when it can stop caching fulltexts, as well.

3590

3591

# Children that are missing their compression parent

3592

pending_deltas = {}

3593

for (key, record, digest) in self._vf._read_records_iter(records):

3594

# ghosts?

3595

details = self._all_build_details[key]

3596

(_, compression_parent, parent_keys, record_details) = details

3597

lines = self._expand_record(key, parent_keys, compression_parent,

3598

record, record_details)

3599

if lines is None:

3600

# Pending delta should be queued up

3483

3601

continue

3484

parent_ids = self._revision_id_graph[rev_id]

3485

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3486

details = self._all_build_details[rev_id]

3487

(index_memo, compression_parent, parents,

3488

record_details) = details

3489

nodes_to_annotate = []

3490

# TODO: Remove the punning between compression parents, and

3491

# parent_ids, we should be able to do this without assuming

3492

# the build order

3493

if len(parent_ids) == 0:

3494

# There are no parents for this node, so just add it

3495

# TODO: This probably needs to be decoupled

3496

fulltext_content, delta = self._knit._factory.parse_record(

3497

rev_id, record, record_details, None)

3498

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3499

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3500

parent_ids, left_matching_blocks=None))

3501

else:

3502

child = (rev_id, parent_ids, record)

3503

# Check if all the parents are present

3504

self._check_parents(child, nodes_to_annotate)

3505

while nodes_to_annotate:

3506

# Should we use a queue here instead of a stack?

3507

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3508

(index_memo, compression_parent, parents,

3509

record_details) = self._all_build_details[rev_id]

3510

blocks = None

3511

if compression_parent is not None:

3512

comp_children = self._compression_children[compression_parent]

3513

if rev_id not in comp_children:

3514

raise AssertionError("%r not in compression children %r"

3515

% (rev_id, comp_children))

3516

# If there is only 1 child, it is safe to reuse this

3517

# content

3518

reuse_content = (len(comp_children) == 1

3519

and compression_parent not in

3520

self._nodes_to_keep_annotations)

3521

if reuse_content:

3522

# Remove it from the cache since it will be changing

3523

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3524

# Make sure to copy the fulltext since it might be

3525

# modified

3526

parent_fulltext = list(parent_fulltext_content.text())

3527

else:

3528

parent_fulltext_content = self._fulltext_contents[compression_parent]

3529

parent_fulltext = parent_fulltext_content.text()

3530

comp_children.remove(rev_id)

3531

fulltext_content, delta = self._knit._factory.parse_record(

3532

rev_id, record, record_details,

3533

parent_fulltext_content,

3534

copy_base_content=(not reuse_content))

3535

fulltext = self._add_fulltext_content(rev_id,

3536

fulltext_content)

3537

if compression_parent == parent_ids[0]:

3538

# the compression_parent is the left parent, so we can

3539

# re-use the delta

3540

blocks = KnitContent.get_line_delta_blocks(delta,

3541

parent_fulltext, fulltext)

3542

else:

3543

fulltext_content = self._knit._factory.parse_fulltext(

3544

record, rev_id)

3545

fulltext = self._add_fulltext_content(rev_id,

3546

fulltext_content)

3547

nodes_to_annotate.extend(

3548

self._add_annotation(rev_id, fulltext, parent_ids,

3549

left_matching_blocks=blocks))

3550

3551

def _get_heads_provider(self):

3552

"""Create a heads provider for resolving ancestry issues."""

3553

if self._heads_provider is not None:

3554

return self._heads_provider

3555

self._heads_provider = _mod_graph.KnownGraph(self._revision_id_graph)

3556

return self._heads_provider

3557

3558

def annotate(self, key):

3559

"""Return the annotated fulltext at the given key.

3560

3561

:param key: The key to annotate.

3562

"""

3563

if len(self._knit._fallback_vfs) > 0:

3564

# stacked knits can't use the fast path at present.

3565

return self._simple_annotate(key)

3566

while True:

3567

try:

3568

records = self._get_build_graph(key)

3569

if key in self._ghosts:

3570

raise errors.RevisionNotPresent(key, self._knit)

3571

self._annotate_records(records)

3572

return self._annotated_lines[key]

3573

except errors.RetryWithNewPacks, e:

3574

self._knit._access.reload_or_raise(e)

3575

# The cached build_details are no longer valid

3576

self._all_build_details.clear()

3577

3578

def _simple_annotate(self, key):

3579

"""Return annotated fulltext, rediffing from the full texts.

3580

3581

This is slow but makes no assumptions about the repository

3582

being able to produce line deltas.

3583

"""

3584

# TODO: this code generates a parent maps of present ancestors; it

3585

# could be split out into a separate method

3586

# -- mbp and robertc 20080704

3587

graph = _mod_graph.Graph(self._knit)

3588

parent_map = dict((k, v) for k, v in graph.iter_ancestry([key])

3589

if v is not None)

3590

if not parent_map:

3591

raise errors.RevisionNotPresent(key, self)

3592

keys = parent_map.keys()

3593

heads_provider = _mod_graph.KnownGraph(parent_map)

3594

parent_cache = {}

3595

reannotate = annotate.reannotate

3596

for record in self._knit.get_record_stream(keys, 'topological', True):

3597

key = record.key

3598

fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

3599

parents = parent_map[key]

3600

if parents is not None:

3601

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

3602

else:

3603

parent_lines = []

3604

parent_cache[key] = list(

3605

reannotate(parent_lines, fulltext, key, None, heads_provider))

3606

try:

3607

return parent_cache[key]

3608

except KeyError, e:

3609

raise errors.RevisionNotPresent(key, self._knit)

3610

3602

# At this point, we may be able to yield this content, if all

3603

# parents are also finished

3604

yield_this_text = self._check_ready_for_annotations(key,

3605

parent_keys)

3606

if yield_this_text:

3607

# All parents present

3608

yield key, lines, len(lines)

3609

to_process = self._process_pending(key)

3610

while to_process:

3611

this_process = to_process

3612

to_process = []

3613

for key in this_process:

3614

lines = self._text_cache[key]

3615

yield key, lines, len(lines)

3616

to_process.extend(self._process_pending(key))

3611

3617

3612

3618

try:

3613

3619

from bzrlib._knit_load_data_c import _load_data_c as _load_data

Older »