bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
1  | 
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
2  | 
#
 | 
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
17  | 
"""Knit versionedfile implementation.
 | 
|
18  | 
||
19  | 
A knit is a versioned file implementation that supports efficient append only
 | 
|
20  | 
updates.
 | 
|
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
21  | 
|
22  | 
Knit file layout:
 | 
|
23  | 
lifeless: the data file is made up of "delta records".  each delta record has a delta header 
 | 
|
24  | 
that contains; (1) a version id, (2) the size of the delta (in lines), and (3)  the digest of 
 | 
|
25  | 
the -expanded data- (ie, the delta applied to the parent).  the delta also ends with a 
 | 
|
26  | 
end-marker; simply "end VERSION"
 | 
|
27  | 
||
28  | 
delta can be line or full contents.a
 | 
|
29  | 
... the 8's there are the index number of the annotation.
 | 
|
30  | 
version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e
 | 
|
31  | 
59,59,3
 | 
|
32  | 
8
 | 
|
33  | 
8         if ie.executable:
 | 
|
34  | 
8             e.set('executable', 'yes')
 | 
|
35  | 
130,130,2
 | 
|
36  | 
8         if elt.get('executable') == 'yes':
 | 
|
37  | 
8             ie.executable = True
 | 
|
38  | 
end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 
 | 
|
39  | 
||
40  | 
||
41  | 
whats in an index:
 | 
|
42  | 
09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents
 | 
|
43  | 
09:33 < jrydberg> lifeless: the parents are currently dictionary compressed
 | 
|
44  | 
09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)
 | 
|
45  | 
09:33 < lifeless> right
 | 
|
46  | 
09:33 < jrydberg> lifeless: the position and size is the range in the data file
 | 
|
47  | 
||
48  | 
||
49  | 
so the index sequence is the dictionary compressed sequence number used
 | 
|
50  | 
in the deltas to provide line annotation
 | 
|
51  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
52  | 
"""
 | 
53  | 
||
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
54  | 
# TODOS:
 | 
55  | 
# 10:16 < lifeless> make partial index writes safe
 | 
|
56  | 
# 10:16 < lifeless> implement 'knit.check()' like weave.check()
 | 
|
57  | 
# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave 
 | 
|
58  | 
#                    always' approach.
 | 
|
| 
1563.2.11
by Robert Collins
 Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis.  | 
59  | 
# move sha1 out of the content so that join is faster at verifying parents
 | 
60  | 
# record content length ?
 | 
|
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
61  | 
|
62  | 
||
| 
1563.2.11
by Robert Collins
 Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis.  | 
63  | 
from cStringIO import StringIO  | 
| 
1596.2.28
by Robert Collins
 more knit profile based tuning.  | 
64  | 
from itertools import izip, chain  | 
| 
1756.2.17
by Aaron Bentley
 Fixes suggested by John Meinel  | 
65  | 
import operator  | 
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
66  | 
import os  | 
| 
3789.2.1
by John Arbash Meinel
 _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.  | 
67  | 
import sys  | 
| 
1594.2.19
by Robert Collins
 More coalescing tweaks, and knit feedback.  | 
68  | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
69  | 
from bzrlib.lazy_import import lazy_import  | 
70  | 
lazy_import(globals(), """  | 
|
71  | 
from bzrlib import (
 | 
|
| 
2770.1.1
by Aaron Bentley
 Initial implmentation of plain knit annotation  | 
72  | 
    annotate,
 | 
| 
3535.5.1
by John Arbash Meinel
 cleanup a few imports to be lazily loaded.  | 
73  | 
    debug,
 | 
74  | 
    diff,
 | 
|
| 
3224.1.10
by John Arbash Meinel
 Introduce the heads_provider for reannotate.  | 
75  | 
    graph as _mod_graph,
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
76  | 
    index as _mod_index,
 | 
| 
2998.2.2
by John Arbash Meinel
 implement a faster path for copying from packs back to knits.  | 
77  | 
    lru_cache,
 | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
78  | 
    pack,
 | 
| 
3535.5.1
by John Arbash Meinel
 cleanup a few imports to be lazily loaded.  | 
79  | 
    progress,
 | 
| 
2745.1.2
by Robert Collins
 Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly.  | 
80  | 
    trace,
 | 
| 
3224.5.1
by Andrew Bennetts
 Lots of assorted hackery to reduce the number of imports for common operations. Improves 'rocks', 'st' and 'help' times by ~50ms on my laptop.  | 
81  | 
    tsort,
 | 
| 
3535.5.1
by John Arbash Meinel
 cleanup a few imports to be lazily loaded.  | 
82  | 
    tuned_gzip,
 | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
83  | 
    )
 | 
84  | 
""")  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
85  | 
from bzrlib import (  | 
86  | 
errors,  | 
|
| 
2249.5.12
by John Arbash Meinel
 Change the APIs for VersionedFile, Store, and some of Repository into utf-8  | 
87  | 
osutils,  | 
| 
2104.4.2
by John Arbash Meinel
 Small cleanup and NEWS entry about fixing bug #65714  | 
88  | 
patiencediff,  | 
| 
2158.3.1
by Dmitry Vasiliev
 KnitIndex tests/fixes/optimizations  | 
89  | 
    )
 | 
90  | 
from bzrlib.errors import (  | 
|
91  | 
FileExists,  | 
|
92  | 
NoSuchFile,  | 
|
93  | 
KnitError,  | 
|
94  | 
InvalidRevisionId,  | 
|
95  | 
KnitCorrupt,  | 
|
96  | 
KnitHeaderError,  | 
|
97  | 
RevisionNotPresent,  | 
|
98  | 
RevisionAlreadyPresent,  | 
|
| 
3787.1.1
by Robert Collins
 Embed the failed text in sha1 knit errors.  | 
99  | 
SHA1KnitCorrupt,  | 
| 
2158.3.1
by Dmitry Vasiliev
 KnitIndex tests/fixes/optimizations  | 
100  | 
    )
 | 
101  | 
from bzrlib.osutils import (  | 
|
102  | 
contains_whitespace,  | 
|
103  | 
contains_linebreaks,  | 
|
| 
2850.1.1
by Robert Collins
 * ``KnitVersionedFile.add*`` will no longer cache added records even when  | 
104  | 
sha_string,  | 
| 
2158.3.1
by Dmitry Vasiliev
 KnitIndex tests/fixes/optimizations  | 
105  | 
sha_strings,  | 
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
106  | 
split_lines,  | 
| 
2158.3.1
by Dmitry Vasiliev
 KnitIndex tests/fixes/optimizations  | 
107  | 
    )
 | 
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
108  | 
from bzrlib.versionedfile import (  | 
| 
3350.3.12
by Robert Collins
 Generate streams with absent records.  | 
109  | 
AbsentContentFactory,  | 
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
110  | 
adapter_registry,  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
111  | 
ConstantMapper,  | 
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
112  | 
ContentFactory,  | 
| 
3890.2.1
by John Arbash Meinel
 Start working on a ChunkedContentFactory.  | 
113  | 
ChunkedContentFactory,  | 
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
114  | 
VersionedFile,  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
115  | 
VersionedFiles,  | 
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
116  | 
    )
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
117  | 
|
118  | 
||
119  | 
# TODO: Split out code specific to this format into an associated object.
 | 
|
120  | 
||
121  | 
# TODO: Can we put in some kind of value to check that the index and data
 | 
|
122  | 
# files belong together?
 | 
|
123  | 
||
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
124  | 
# TODO: accommodate binaries, perhaps by storing a byte count
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
125  | 
|
126  | 
# TODO: function to check whole file
 | 
|
127  | 
||
128  | 
# TODO: atomically append data, then measure backwards from the cursor
 | 
|
129  | 
# position after writing to work out where it was located.  we may need to
 | 
|
130  | 
# bypass python file buffering.
 | 
|
131  | 
||
132  | 
DATA_SUFFIX = '.knit'  | 
|
133  | 
INDEX_SUFFIX = '.kndx'  | 
|
134  | 
||
135  | 
||
| 
3350.3.4
by Robert Collins
 Finish adapters for annotated knits to unannotated knits and full texts.  | 
136  | 
class KnitAdapter(object):  | 
137  | 
"""Base class for knit record adaption."""  | 
|
138  | 
||
| 
3350.3.7
by Robert Collins
 Create a registry of versioned file record adapters.  | 
139  | 
def __init__(self, basis_vf):  | 
140  | 
"""Create an adapter which accesses full texts from basis_vf.  | 
|
141  | 
        
 | 
|
142  | 
        :param basis_vf: A versioned file to access basis texts of deltas from.
 | 
|
143  | 
            May be None for adapters that do not need to access basis texts.
 | 
|
144  | 
        """
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
145  | 
self._data = KnitVersionedFiles(None, None)  | 
| 
3350.3.4
by Robert Collins
 Finish adapters for annotated knits to unannotated knits and full texts.  | 
146  | 
self._annotate_factory = KnitAnnotateFactory()  | 
147  | 
self._plain_factory = KnitPlainFactory()  | 
|
| 
3350.3.7
by Robert Collins
 Create a registry of versioned file record adapters.  | 
148  | 
self._basis_vf = basis_vf  | 
| 
3350.3.4
by Robert Collins
 Finish adapters for annotated knits to unannotated knits and full texts.  | 
149  | 
|
150  | 
||
151  | 
class FTAnnotatedToUnannotated(KnitAdapter):  | 
|
152  | 
"""An adapter from FT annotated knits to unannotated ones."""  | 
|
153  | 
||
154  | 
def get_bytes(self, factory, annotated_compressed_bytes):  | 
|
155  | 
rec, contents = \  | 
|
156  | 
self._data._parse_record_unchecked(annotated_compressed_bytes)  | 
|
157  | 
content = self._annotate_factory.parse_fulltext(contents, rec[1])  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
158  | 
size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())  | 
| 
3350.3.4
by Robert Collins
 Finish adapters for annotated knits to unannotated knits and full texts.  | 
159  | 
return bytes  | 
160  | 
||
161  | 
||
162  | 
class DeltaAnnotatedToUnannotated(KnitAdapter):  | 
|
163  | 
"""An adapter for deltas from annotated to unannotated."""  | 
|
164  | 
||
165  | 
def get_bytes(self, factory, annotated_compressed_bytes):  | 
|
166  | 
rec, contents = \  | 
|
167  | 
self._data._parse_record_unchecked(annotated_compressed_bytes)  | 
|
168  | 
delta = self._annotate_factory.parse_line_delta(contents, rec[1],  | 
|
169  | 
plain=True)  | 
|
170  | 
contents = self._plain_factory.lower_line_delta(delta)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
171  | 
size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)  | 
| 
3350.3.4
by Robert Collins
 Finish adapters for annotated knits to unannotated knits and full texts.  | 
172  | 
return bytes  | 
173  | 
||
174  | 
||
175  | 
class FTAnnotatedToFullText(KnitAdapter):  | 
|
176  | 
"""An adapter from FT annotated knits to unannotated ones."""  | 
|
177  | 
||
178  | 
def get_bytes(self, factory, annotated_compressed_bytes):  | 
|
179  | 
rec, contents = \  | 
|
180  | 
self._data._parse_record_unchecked(annotated_compressed_bytes)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
181  | 
content, delta = self._annotate_factory.parse_record(factory.key[-1],  | 
| 
3350.3.4
by Robert Collins
 Finish adapters for annotated knits to unannotated knits and full texts.  | 
182  | 
contents, factory._build_details, None)  | 
183  | 
return ''.join(content.text())  | 
|
184  | 
||
185  | 
||
186  | 
class DeltaAnnotatedToFullText(KnitAdapter):  | 
|
187  | 
"""An adapter for deltas from annotated to unannotated."""  | 
|
188  | 
||
189  | 
def get_bytes(self, factory, annotated_compressed_bytes):  | 
|
190  | 
rec, contents = \  | 
|
191  | 
self._data._parse_record_unchecked(annotated_compressed_bytes)  | 
|
192  | 
delta = self._annotate_factory.parse_line_delta(contents, rec[1],  | 
|
193  | 
plain=True)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
194  | 
compression_parent = factory.parents[0]  | 
195  | 
basis_entry = self._basis_vf.get_record_stream(  | 
|
196  | 
[compression_parent], 'unordered', True).next()  | 
|
197  | 
if basis_entry.storage_kind == 'absent':  | 
|
198  | 
raise errors.RevisionNotPresent(compression_parent, self._basis_vf)  | 
|
| 
3890.2.9
by John Arbash Meinel
 Start using osutils.chunks_as_lines rather than osutils.split_lines.  | 
199  | 
basis_chunks = basis_entry.get_bytes_as('chunked')  | 
200  | 
basis_lines = osutils.chunks_to_lines(basis_chunks)  | 
|
| 
3350.3.4
by Robert Collins
 Finish adapters for annotated knits to unannotated knits and full texts.  | 
201  | 
        # Manually apply the delta because we have one annotated content and
 | 
202  | 
        # one plain.
 | 
|
203  | 
basis_content = PlainKnitContent(basis_lines, compression_parent)  | 
|
204  | 
basis_content.apply_delta(delta, rec[1])  | 
|
205  | 
basis_content._should_strip_eol = factory._build_details[1]  | 
|
206  | 
return ''.join(basis_content.text())  | 
|
207  | 
||
208  | 
||
| 
3350.3.5
by Robert Collins
 Create adapters from plain compressed knit content.  | 
209  | 
class FTPlainToFullText(KnitAdapter):  | 
210  | 
"""An adapter from FT plain knits to unannotated ones."""  | 
|
211  | 
||
212  | 
def get_bytes(self, factory, compressed_bytes):  | 
|
213  | 
rec, contents = \  | 
|
214  | 
self._data._parse_record_unchecked(compressed_bytes)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
215  | 
content, delta = self._plain_factory.parse_record(factory.key[-1],  | 
| 
3350.3.5
by Robert Collins
 Create adapters from plain compressed knit content.  | 
216  | 
contents, factory._build_details, None)  | 
217  | 
return ''.join(content.text())  | 
|
218  | 
||
219  | 
||
220  | 
class DeltaPlainToFullText(KnitAdapter):  | 
|
221  | 
"""An adapter for deltas from annotated to unannotated."""  | 
|
222  | 
||
223  | 
def get_bytes(self, factory, compressed_bytes):  | 
|
224  | 
rec, contents = \  | 
|
225  | 
self._data._parse_record_unchecked(compressed_bytes)  | 
|
226  | 
delta = self._plain_factory.parse_line_delta(contents, rec[1])  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
227  | 
compression_parent = factory.parents[0]  | 
228  | 
        # XXX: string splitting overhead.
 | 
|
229  | 
basis_entry = self._basis_vf.get_record_stream(  | 
|
230  | 
[compression_parent], 'unordered', True).next()  | 
|
231  | 
if basis_entry.storage_kind == 'absent':  | 
|
232  | 
raise errors.RevisionNotPresent(compression_parent, self._basis_vf)  | 
|
| 
3890.2.9
by John Arbash Meinel
 Start using osutils.chunks_as_lines rather than osutils.split_lines.  | 
233  | 
basis_chunks = basis_entry.get_bytes_as('chunked')  | 
234  | 
basis_lines = osutils.chunks_to_lines(basis_chunks)  | 
|
| 
3350.3.5
by Robert Collins
 Create adapters from plain compressed knit content.  | 
235  | 
basis_content = PlainKnitContent(basis_lines, compression_parent)  | 
236  | 
        # Manually apply the delta because we have one annotated content and
 | 
|
237  | 
        # one plain.
 | 
|
238  | 
content, _ = self._plain_factory.parse_record(rec[1], contents,  | 
|
239  | 
factory._build_details, basis_content)  | 
|
240  | 
return ''.join(content.text())  | 
|
241  | 
||
242  | 
||
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
243  | 
class KnitContentFactory(ContentFactory):  | 
244  | 
"""Content factory for streaming from knits.  | 
|
245  | 
    
 | 
|
246  | 
    :seealso ContentFactory:
 | 
|
247  | 
    """
 | 
|
248  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
249  | 
def __init__(self, key, parents, build_details, sha1, raw_record,  | 
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
250  | 
annotated, knit=None):  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
251  | 
"""Create a KnitContentFactory for key.  | 
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
252  | 
        
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
253  | 
        :param key: The key.
 | 
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
254  | 
        :param parents: The parents.
 | 
255  | 
        :param build_details: The build details as returned from
 | 
|
256  | 
            get_build_details.
 | 
|
257  | 
        :param sha1: The sha1 expected from the full text of this object.
 | 
|
258  | 
        :param raw_record: The bytes of the knit data from disk.
 | 
|
259  | 
        :param annotated: True if the raw data is annotated.
 | 
|
260  | 
        """
 | 
|
261  | 
ContentFactory.__init__(self)  | 
|
262  | 
self.sha1 = sha1  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
263  | 
self.key = key  | 
264  | 
self.parents = parents  | 
|
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
265  | 
if build_details[0] == 'line-delta':  | 
266  | 
kind = 'delta'  | 
|
267  | 
else:  | 
|
268  | 
kind = 'ft'  | 
|
269  | 
if annotated:  | 
|
270  | 
annotated_kind = 'annotated-'  | 
|
271  | 
else:  | 
|
272  | 
annotated_kind = ''  | 
|
273  | 
self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)  | 
|
274  | 
self._raw_record = raw_record  | 
|
275  | 
self._build_details = build_details  | 
|
276  | 
self._knit = knit  | 
|
277  | 
||
278  | 
def get_bytes_as(self, storage_kind):  | 
|
279  | 
if storage_kind == self.storage_kind:  | 
|
280  | 
return self._raw_record  | 
|
| 
3890.2.1
by John Arbash Meinel
 Start working on a ChunkedContentFactory.  | 
281  | 
if self._knit is not None:  | 
282  | 
if storage_kind == 'chunked':  | 
|
283  | 
return self._knit.get_lines(self.key[0])  | 
|
284  | 
elif storage_kind == 'fulltext':  | 
|
285  | 
return self._knit.get_text(self.key[0])  | 
|
286  | 
raise errors.UnavailableRepresentation(self.key, storage_kind,  | 
|
287  | 
self.storage_kind)  | 
|
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
288  | 
|
289  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
290  | 
class KnitContent(object):  | 
| 
3468.2.4
by Martin Pool
 Test and fix #234748 problems in trailing newline diffs  | 
291  | 
"""Content of a knit version to which deltas can be applied.  | 
292  | 
    
 | 
|
| 
3468.2.5
by Martin Pool
 Correct comment and remove overbroad except block  | 
293  | 
    This is always stored in memory as a list of lines with \n at the end,
 | 
294  | 
    plus a flag saying if the final ending is really there or not, because that 
 | 
|
295  | 
    corresponds to the on-disk knit representation.
 | 
|
| 
3468.2.4
by Martin Pool
 Test and fix #234748 problems in trailing newline diffs  | 
296  | 
    """
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
297  | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
298  | 
def __init__(self):  | 
299  | 
self._should_strip_eol = False  | 
|
300  | 
||
| 
2921.2.1
by Robert Collins
 * Knit text reconstruction now avoids making copies of the lines list for  | 
301  | 
def apply_delta(self, delta, new_version_id):  | 
| 
2921.2.2
by Robert Collins
 Review feedback.  | 
302  | 
"""Apply delta to this object to become new_version_id."""  | 
| 
2921.2.1
by Robert Collins
 * Knit text reconstruction now avoids making copies of the lines list for  | 
303  | 
raise NotImplementedError(self.apply_delta)  | 
304  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
305  | 
def line_delta_iter(self, new_lines):  | 
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
306  | 
"""Generate line-based delta from this content to new_lines."""  | 
| 
2151.1.1
by John Arbash Meinel
 (Dmitry Vasiliev) Tune KnitContent and add tests  | 
307  | 
new_texts = new_lines.text()  | 
308  | 
old_texts = self.text()  | 
|
| 
2781.1.1
by Martin Pool
 merge cpatiencediff from Lukas  | 
309  | 
s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)  | 
| 
2151.1.1
by John Arbash Meinel
 (Dmitry Vasiliev) Tune KnitContent and add tests  | 
310  | 
for tag, i1, i2, j1, j2 in s.get_opcodes():  | 
311  | 
if tag == 'equal':  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
312  | 
                continue
 | 
| 
2151.1.1
by John Arbash Meinel
 (Dmitry Vasiliev) Tune KnitContent and add tests  | 
313  | 
            # ofrom, oto, length, data
 | 
314  | 
yield i1, i2, j2 - j1, new_lines._lines[j1:j2]  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
315  | 
|
316  | 
def line_delta(self, new_lines):  | 
|
317  | 
return list(self.line_delta_iter(new_lines))  | 
|
318  | 
||
| 
2520.4.41
by Aaron Bentley
 Accelerate mpdiff generation  | 
319  | 
    @staticmethod
 | 
| 
2520.4.48
by Aaron Bentley
 Support getting blocks from knit deltas with no final EOL  | 
320  | 
def get_line_delta_blocks(knit_delta, source, target):  | 
| 
2520.4.41
by Aaron Bentley
 Accelerate mpdiff generation  | 
321  | 
"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""  | 
| 
2520.4.48
by Aaron Bentley
 Support getting blocks from knit deltas with no final EOL  | 
322  | 
target_len = len(target)  | 
| 
2520.4.41
by Aaron Bentley
 Accelerate mpdiff generation  | 
323  | 
s_pos = 0  | 
324  | 
t_pos = 0  | 
|
325  | 
for s_begin, s_end, t_len, new_text in knit_delta:  | 
|
| 
2520.4.47
by Aaron Bentley
 Fix get_line_delta_blocks with eol  | 
326  | 
true_n = s_begin - s_pos  | 
327  | 
n = true_n  | 
|
| 
2520.4.41
by Aaron Bentley
 Accelerate mpdiff generation  | 
328  | 
if n > 0:  | 
| 
2520.4.48
by Aaron Bentley
 Support getting blocks from knit deltas with no final EOL  | 
329  | 
                # knit deltas do not provide reliable info about whether the
 | 
330  | 
                # last line of a file matches, due to eol handling.
 | 
|
331  | 
if source[s_pos + n -1] != target[t_pos + n -1]:  | 
|
| 
2520.4.47
by Aaron Bentley
 Fix get_line_delta_blocks with eol  | 
332  | 
n-=1  | 
333  | 
if n > 0:  | 
|
334  | 
yield s_pos, t_pos, n  | 
|
335  | 
t_pos += t_len + true_n  | 
|
| 
2520.4.41
by Aaron Bentley
 Accelerate mpdiff generation  | 
336  | 
s_pos = s_end  | 
| 
2520.4.48
by Aaron Bentley
 Support getting blocks from knit deltas with no final EOL  | 
337  | 
n = target_len - t_pos  | 
338  | 
if n > 0:  | 
|
339  | 
if source[s_pos + n -1] != target[t_pos + n -1]:  | 
|
340  | 
n-=1  | 
|
341  | 
if n > 0:  | 
|
342  | 
yield s_pos, t_pos, n  | 
|
| 
2520.4.41
by Aaron Bentley
 Accelerate mpdiff generation  | 
343  | 
yield s_pos + (target_len - t_pos), target_len, 0  | 
344  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
345  | 
|
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
346  | 
class AnnotatedKnitContent(KnitContent):  | 
347  | 
"""Annotated content."""  | 
|
348  | 
||
349  | 
def __init__(self, lines):  | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
350  | 
KnitContent.__init__(self)  | 
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
351  | 
self._lines = lines  | 
352  | 
||
| 
3316.2.13
by Robert Collins
 * ``VersionedFile.annotate_iter`` is deprecated. While in principal this  | 
353  | 
def annotate(self):  | 
354  | 
"""Return a list of (origin, text) for each content line."""  | 
|
| 
3468.2.4
by Martin Pool
 Test and fix #234748 problems in trailing newline diffs  | 
355  | 
lines = self._lines[:]  | 
356  | 
if self._should_strip_eol:  | 
|
357  | 
origin, last_line = lines[-1]  | 
|
358  | 
lines[-1] = (origin, last_line.rstrip('\n'))  | 
|
359  | 
return lines  | 
|
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
360  | 
|
| 
2921.2.1
by Robert Collins
 * Knit text reconstruction now avoids making copies of the lines list for  | 
361  | 
def apply_delta(self, delta, new_version_id):  | 
| 
2921.2.2
by Robert Collins
 Review feedback.  | 
362  | 
"""Apply delta to this object to become new_version_id."""  | 
| 
2921.2.1
by Robert Collins
 * Knit text reconstruction now avoids making copies of the lines list for  | 
363  | 
offset = 0  | 
364  | 
lines = self._lines  | 
|
365  | 
for start, end, count, delta_lines in delta:  | 
|
366  | 
lines[offset+start:offset+end] = delta_lines  | 
|
367  | 
offset = offset + (start - end) + count  | 
|
368  | 
||
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
369  | 
def text(self):  | 
| 
2911.1.1
by Martin Pool
 Better messages when problems are detected inside a knit  | 
370  | 
try:  | 
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
371  | 
lines = [text for origin, text in self._lines]  | 
| 
2911.1.1
by Martin Pool
 Better messages when problems are detected inside a knit  | 
372  | 
except ValueError, e:  | 
373  | 
            # most commonly (only?) caused by the internal form of the knit
 | 
|
374  | 
            # missing annotation information because of a bug - see thread
 | 
|
375  | 
            # around 20071015
 | 
|
376  | 
raise KnitCorrupt(self,  | 
|
377  | 
"line in annotated knit missing annotation information: %s"  | 
|
378  | 
% (e,))  | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
379  | 
if self._should_strip_eol:  | 
| 
3350.3.4
by Robert Collins
 Finish adapters for annotated knits to unannotated knits and full texts.  | 
380  | 
lines[-1] = lines[-1].rstrip('\n')  | 
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
381  | 
return lines  | 
382  | 
||
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
383  | 
def copy(self):  | 
384  | 
return AnnotatedKnitContent(self._lines[:])  | 
|
385  | 
||
386  | 
||
387  | 
class PlainKnitContent(KnitContent):  | 
|
| 
2794.1.3
by Robert Collins
 Review feedback.  | 
388  | 
"""Unannotated content.  | 
389  | 
    
 | 
|
390  | 
    When annotate[_iter] is called on this content, the same version is reported
 | 
|
391  | 
    for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent
 | 
|
392  | 
    objects.
 | 
|
393  | 
    """
 | 
|
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
394  | 
|
395  | 
def __init__(self, lines, version_id):  | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
396  | 
KnitContent.__init__(self)  | 
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
397  | 
self._lines = lines  | 
398  | 
self._version_id = version_id  | 
|
399  | 
||
| 
3316.2.13
by Robert Collins
 * ``VersionedFile.annotate_iter`` is deprecated. While in principal this  | 
400  | 
def annotate(self):  | 
401  | 
"""Return a list of (origin, text) for each content line."""  | 
|
402  | 
return [(self._version_id, line) for line in self._lines]  | 
|
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
403  | 
|
| 
2921.2.1
by Robert Collins
 * Knit text reconstruction now avoids making copies of the lines list for  | 
404  | 
def apply_delta(self, delta, new_version_id):  | 
| 
2921.2.2
by Robert Collins
 Review feedback.  | 
405  | 
"""Apply delta to this object to become new_version_id."""  | 
| 
2921.2.1
by Robert Collins
 * Knit text reconstruction now avoids making copies of the lines list for  | 
406  | 
offset = 0  | 
407  | 
lines = self._lines  | 
|
408  | 
for start, end, count, delta_lines in delta:  | 
|
409  | 
lines[offset+start:offset+end] = delta_lines  | 
|
410  | 
offset = offset + (start - end) + count  | 
|
411  | 
self._version_id = new_version_id  | 
|
412  | 
||
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
413  | 
def copy(self):  | 
414  | 
return PlainKnitContent(self._lines[:], self._version_id)  | 
|
415  | 
||
416  | 
def text(self):  | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
417  | 
lines = self._lines  | 
418  | 
if self._should_strip_eol:  | 
|
419  | 
lines = lines[:]  | 
|
420  | 
lines[-1] = lines[-1].rstrip('\n')  | 
|
421  | 
return lines  | 
|
422  | 
||
423  | 
||
424  | 
class _KnitFactory(object):  | 
|
425  | 
"""Base class for common Factory functions."""  | 
|
426  | 
||
427  | 
def parse_record(self, version_id, record, record_details,  | 
|
428  | 
base_content, copy_base_content=True):  | 
|
429  | 
"""Parse a record into a full content object.  | 
|
430  | 
||
431  | 
        :param version_id: The official version id for this content
 | 
|
432  | 
        :param record: The data returned by read_records_iter()
 | 
|
433  | 
        :param record_details: Details about the record returned by
 | 
|
434  | 
            get_build_details
 | 
|
435  | 
        :param base_content: If get_build_details returns a compression_parent,
 | 
|
436  | 
            you must return a base_content here, else use None
 | 
|
437  | 
        :param copy_base_content: When building from the base_content, decide
 | 
|
438  | 
            you can either copy it and return a new object, or modify it in
 | 
|
439  | 
            place.
 | 
|
440  | 
        :return: (content, delta) A Content object and possibly a line-delta,
 | 
|
441  | 
            delta may be None
 | 
|
442  | 
        """
 | 
|
443  | 
method, noeol = record_details  | 
|
444  | 
if method == 'line-delta':  | 
|
445  | 
if copy_base_content:  | 
|
446  | 
content = base_content.copy()  | 
|
447  | 
else:  | 
|
448  | 
content = base_content  | 
|
449  | 
delta = self.parse_line_delta(record, version_id)  | 
|
450  | 
content.apply_delta(delta, version_id)  | 
|
451  | 
else:  | 
|
452  | 
content = self.parse_fulltext(record, version_id)  | 
|
453  | 
delta = None  | 
|
454  | 
content._should_strip_eol = noeol  | 
|
455  | 
return (content, delta)  | 
|
456  | 
||
457  | 
||
458  | 
class KnitAnnotateFactory(_KnitFactory):  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
459  | 
"""Factory for creating annotated Content objects."""  | 
460  | 
||
461  | 
annotated = True  | 
|
462  | 
||
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
463  | 
def make(self, lines, version_id):  | 
464  | 
num_lines = len(lines)  | 
|
465  | 
return AnnotatedKnitContent(zip([version_id] * num_lines, lines))  | 
|
466  | 
||
| 
2249.5.12
by John Arbash Meinel
 Change the APIs for VersionedFile, Store, and some of Repository into utf-8  | 
467  | 
def parse_fulltext(self, content, version_id):  | 
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
468  | 
"""Convert fulltext to internal representation  | 
469  | 
||
470  | 
        fulltext content is of the format
 | 
|
471  | 
        revid(utf8) plaintext\n
 | 
|
472  | 
        internal representation is of the format:
 | 
|
473  | 
        (revid, plaintext)
 | 
|
474  | 
        """
 | 
|
| 
2249.5.12
by John Arbash Meinel
 Change the APIs for VersionedFile, Store, and some of Repository into utf-8  | 
475  | 
        # TODO: jam 20070209 The tests expect this to be returned as tuples,
 | 
476  | 
        #       but the code itself doesn't really depend on that.
 | 
|
477  | 
        #       Figure out a way to not require the overhead of turning the
 | 
|
478  | 
        #       list back into tuples.
 | 
|
479  | 
lines = [tuple(line.split(' ', 1)) for line in content]  | 
|
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
480  | 
return AnnotatedKnitContent(lines)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
481  | 
|
482  | 
def parse_line_delta_iter(self, lines):  | 
|
| 
2163.1.2
by John Arbash Meinel
 Don't modify the list during parse_line_delta  | 
483  | 
return iter(self.parse_line_delta(lines))  | 
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
484  | 
|
| 
2851.4.2
by Ian Clatworthy
 use factory methods in annotated-to-plain conversion instead of duplicating format knowledge  | 
485  | 
def parse_line_delta(self, lines, version_id, plain=False):  | 
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
486  | 
"""Convert a line based delta into internal representation.  | 
487  | 
||
488  | 
        line delta is in the form of:
 | 
|
489  | 
        intstart intend intcount
 | 
|
490  | 
        1..count lines:
 | 
|
491  | 
        revid(utf8) newline\n
 | 
|
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
492  | 
        internal representation is
 | 
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
493  | 
        (start, end, count, [1..count tuples (revid, newline)])
 | 
| 
2851.4.2
by Ian Clatworthy
 use factory methods in annotated-to-plain conversion instead of duplicating format knowledge  | 
494  | 
|
495  | 
        :param plain: If True, the lines are returned as a plain
 | 
|
| 
2911.1.1
by Martin Pool
 Better messages when problems are detected inside a knit  | 
496  | 
            list without annotations, not as a list of (origin, content) tuples, i.e.
 | 
| 
2851.4.2
by Ian Clatworthy
 use factory methods in annotated-to-plain conversion instead of duplicating format knowledge  | 
497  | 
            (start, end, count, [1..count newline])
 | 
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
498  | 
        """
 | 
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
499  | 
result = []  | 
500  | 
lines = iter(lines)  | 
|
501  | 
next = lines.next  | 
|
| 
2249.5.1
by John Arbash Meinel
 Leave revision-ids in utf-8 when reading.  | 
502  | 
|
| 
2249.5.15
by John Arbash Meinel
 remove get_cached_utf8 checks which were slowing things down.  | 
503  | 
cache = {}  | 
504  | 
def cache_and_return(line):  | 
|
505  | 
origin, text = line.split(' ', 1)  | 
|
506  | 
return cache.setdefault(origin, origin), text  | 
|
507  | 
||
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
508  | 
        # walk through the lines parsing.
 | 
| 
2851.4.2
by Ian Clatworthy
 use factory methods in annotated-to-plain conversion instead of duplicating format knowledge  | 
509  | 
        # Note that the plain test is explicitly pulled out of the
 | 
510  | 
        # loop to minimise any performance impact
 | 
|
511  | 
if plain:  | 
|
512  | 
for header in lines:  | 
|
513  | 
start, end, count = [int(n) for n in header.split(',')]  | 
|
514  | 
contents = [next().split(' ', 1)[1] for i in xrange(count)]  | 
|
515  | 
result.append((start, end, count, contents))  | 
|
516  | 
else:  | 
|
517  | 
for header in lines:  | 
|
518  | 
start, end, count = [int(n) for n in header.split(',')]  | 
|
519  | 
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]  | 
|
520  | 
result.append((start, end, count, contents))  | 
|
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
521  | 
return result  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
522  | 
|
| 
2163.2.2
by John Arbash Meinel
 Don't deal with annotations when we don't care about them. Saves another 300+ms  | 
523  | 
def get_fulltext_content(self, lines):  | 
524  | 
"""Extract just the content lines from a fulltext."""  | 
|
525  | 
return (line.split(' ', 1)[1] for line in lines)  | 
|
526  | 
||
527  | 
def get_linedelta_content(self, lines):  | 
|
528  | 
"""Extract just the content from a line delta.  | 
|
529  | 
||
530  | 
        This doesn't return all of the extra information stored in a delta.
 | 
|
531  | 
        Only the actual content lines.
 | 
|
532  | 
        """
 | 
|
533  | 
lines = iter(lines)  | 
|
534  | 
next = lines.next  | 
|
535  | 
for header in lines:  | 
|
536  | 
header = header.split(',')  | 
|
537  | 
count = int(header[2])  | 
|
538  | 
for i in xrange(count):  | 
|
539  | 
origin, text = next().split(' ', 1)  | 
|
540  | 
yield text  | 
|
541  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
542  | 
def lower_fulltext(self, content):  | 
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
543  | 
"""convert a fulltext content record into a serializable form.  | 
544  | 
||
545  | 
        see parse_fulltext which this inverts.
 | 
|
546  | 
        """
 | 
|
| 
2249.5.12
by John Arbash Meinel
 Change the APIs for VersionedFile, Store, and some of Repository into utf-8  | 
547  | 
        # TODO: jam 20070209 We only do the caching thing to make sure that
 | 
548  | 
        #       the origin is a valid utf-8 line, eventually we could remove it
 | 
|
| 
2249.5.15
by John Arbash Meinel
 remove get_cached_utf8 checks which were slowing things down.  | 
549  | 
return ['%s %s' % (o, t) for o, t in content._lines]  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
550  | 
|
551  | 
def lower_line_delta(self, delta):  | 
|
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
552  | 
"""convert a delta into a serializable form.  | 
553  | 
||
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
554  | 
        See parse_line_delta which this inverts.
 | 
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
555  | 
        """
 | 
| 
2249.5.12
by John Arbash Meinel
 Change the APIs for VersionedFile, Store, and some of Repository into utf-8  | 
556  | 
        # TODO: jam 20070209 We only do the caching thing to make sure that
 | 
557  | 
        #       the origin is a valid utf-8 line, eventually we could remove it
 | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
558  | 
out = []  | 
559  | 
for start, end, c, lines in delta:  | 
|
560  | 
out.append('%d,%d,%d\n' % (start, end, c))  | 
|
| 
2249.5.15
by John Arbash Meinel
 remove get_cached_utf8 checks which were slowing things down.  | 
561  | 
out.extend(origin + ' ' + text  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
562  | 
for origin, text in lines)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
563  | 
return out  | 
564  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
565  | 
def annotate(self, knit, key):  | 
566  | 
content = knit._get_content(key)  | 
|
567  | 
        # adjust for the fact that serialised annotations are only key suffixes
 | 
|
568  | 
        # for this factory.
 | 
|
569  | 
if type(key) == tuple:  | 
|
570  | 
prefix = key[:-1]  | 
|
571  | 
origins = content.annotate()  | 
|
572  | 
result = []  | 
|
573  | 
for origin, line in origins:  | 
|
574  | 
result.append((prefix + (origin,), line))  | 
|
575  | 
return result  | 
|
576  | 
else:  | 
|
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
577  | 
            # XXX: This smells a bit.  Why would key ever be a non-tuple here?
 | 
578  | 
            # Aren't keys defined to be tuples?  -- spiv 20080618
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
579  | 
return content.annotate()  | 
| 
2770.1.1
by Aaron Bentley
 Initial implmentation of plain knit annotation  | 
580  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
581  | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
582  | 
class KnitPlainFactory(_KnitFactory):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
583  | 
"""Factory for creating plain Content objects."""  | 
584  | 
||
585  | 
annotated = False  | 
|
586  | 
||
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
587  | 
def make(self, lines, version_id):  | 
588  | 
return PlainKnitContent(lines, version_id)  | 
|
589  | 
||
| 
2249.5.12
by John Arbash Meinel
 Change the APIs for VersionedFile, Store, and some of Repository into utf-8  | 
590  | 
def parse_fulltext(self, content, version_id):  | 
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
591  | 
"""This parses an unannotated fulltext.  | 
592  | 
||
593  | 
        Note that this is not a noop - the internal representation
 | 
|
594  | 
        has (versionid, line) - its just a constant versionid.
 | 
|
595  | 
        """
 | 
|
| 
2249.5.12
by John Arbash Meinel
 Change the APIs for VersionedFile, Store, and some of Repository into utf-8  | 
596  | 
return self.make(content, version_id)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
597  | 
|
| 
2249.5.12
by John Arbash Meinel
 Change the APIs for VersionedFile, Store, and some of Repository into utf-8  | 
598  | 
def parse_line_delta_iter(self, lines, version_id):  | 
| 
2163.1.2
by John Arbash Meinel
 Don't modify the list during parse_line_delta  | 
599  | 
cur = 0  | 
600  | 
num_lines = len(lines)  | 
|
601  | 
while cur < num_lines:  | 
|
602  | 
header = lines[cur]  | 
|
603  | 
cur += 1  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
604  | 
start, end, c = [int(n) for n in header.split(',')]  | 
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
605  | 
yield start, end, c, lines[cur:cur+c]  | 
| 
2163.1.2
by John Arbash Meinel
 Don't modify the list during parse_line_delta  | 
606  | 
cur += c  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
607  | 
|
| 
2249.5.12
by John Arbash Meinel
 Change the APIs for VersionedFile, Store, and some of Repository into utf-8  | 
608  | 
def parse_line_delta(self, lines, version_id):  | 
609  | 
return list(self.parse_line_delta_iter(lines, version_id))  | 
|
| 
2158.3.1
by Dmitry Vasiliev
 KnitIndex tests/fixes/optimizations  | 
610  | 
|
| 
2163.2.2
by John Arbash Meinel
 Don't deal with annotations when we don't care about them. Saves another 300+ms  | 
611  | 
def get_fulltext_content(self, lines):  | 
612  | 
"""Extract just the content lines from a fulltext."""  | 
|
613  | 
return iter(lines)  | 
|
614  | 
||
615  | 
def get_linedelta_content(self, lines):  | 
|
616  | 
"""Extract just the content from a line delta.  | 
|
617  | 
||
618  | 
        This doesn't return all of the extra information stored in a delta.
 | 
|
619  | 
        Only the actual content lines.
 | 
|
620  | 
        """
 | 
|
621  | 
lines = iter(lines)  | 
|
622  | 
next = lines.next  | 
|
623  | 
for header in lines:  | 
|
624  | 
header = header.split(',')  | 
|
625  | 
count = int(header[2])  | 
|
626  | 
for i in xrange(count):  | 
|
627  | 
yield next()  | 
|
628  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
629  | 
def lower_fulltext(self, content):  | 
630  | 
return content.text()  | 
|
631  | 
||
632  | 
def lower_line_delta(self, delta):  | 
|
633  | 
out = []  | 
|
634  | 
for start, end, c, lines in delta:  | 
|
635  | 
out.append('%d,%d,%d\n' % (start, end, c))  | 
|
| 
2794.1.2
by Robert Collins
 Nuke versioned file add/get delta support, allowing easy simplification of unannotated Content, reducing memory copies and friction during commit on unannotated texts.  | 
636  | 
out.extend(lines)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
637  | 
return out  | 
638  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
639  | 
def annotate(self, knit, key):  | 
| 
3224.1.7
by John Arbash Meinel
 _StreamIndex also needs to return the proper values for get_build_details.  | 
640  | 
annotator = _KnitAnnotator(knit)  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
641  | 
return annotator.annotate(key)  | 
642  | 
||
643  | 
||
644  | 
||
645  | 
def make_file_factory(annotated, mapper):  | 
|
646  | 
"""Create a factory for creating a file based KnitVersionedFiles.  | 
|
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
647  | 
|
648  | 
    This is only functional enough to run interface tests, it doesn't try to
 | 
|
649  | 
    provide a full pack environment.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
650  | 
    
 | 
651  | 
    :param annotated: knit annotations are wanted.
 | 
|
652  | 
    :param mapper: The mapper from keys to paths.
 | 
|
653  | 
    """
 | 
|
654  | 
def factory(transport):  | 
|
655  | 
index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)  | 
|
656  | 
access = _KnitKeyAccess(transport, mapper)  | 
|
657  | 
return KnitVersionedFiles(index, access, annotated=annotated)  | 
|
658  | 
return factory  | 
|
659  | 
||
660  | 
||
661  | 
def make_pack_factory(graph, delta, keylength):  | 
|
662  | 
"""Create a factory for creating a pack based VersionedFiles.  | 
|
663  | 
||
664  | 
    This is only functional enough to run interface tests, it doesn't try to
 | 
|
665  | 
    provide a full pack environment.
 | 
|
666  | 
    
 | 
|
667  | 
    :param graph: Store a graph.
 | 
|
668  | 
    :param delta: Delta compress contents.
 | 
|
669  | 
    :param keylength: How long should keys be.
 | 
|
670  | 
    """
 | 
|
671  | 
def factory(transport):  | 
|
672  | 
parents = graph or delta  | 
|
673  | 
ref_length = 0  | 
|
674  | 
if graph:  | 
|
675  | 
ref_length += 1  | 
|
676  | 
if delta:  | 
|
677  | 
ref_length += 1  | 
|
678  | 
max_delta_chain = 200  | 
|
679  | 
else:  | 
|
680  | 
max_delta_chain = 0  | 
|
681  | 
graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,  | 
|
682  | 
key_elements=keylength)  | 
|
683  | 
stream = transport.open_write_stream('newpack')  | 
|
684  | 
writer = pack.ContainerWriter(stream.write)  | 
|
685  | 
writer.begin()  | 
|
686  | 
index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,  | 
|
687  | 
deltas=delta, add_callback=graph_index.add_nodes)  | 
|
688  | 
access = _DirectPackAccess({})  | 
|
689  | 
access.set_writer(writer, graph_index, (transport, 'newpack'))  | 
|
690  | 
result = KnitVersionedFiles(index, access,  | 
|
691  | 
max_delta_chain=max_delta_chain)  | 
|
692  | 
result.stream = stream  | 
|
693  | 
result.writer = writer  | 
|
694  | 
return result  | 
|
695  | 
return factory  | 
|
696  | 
||
697  | 
||
698  | 
def cleanup_pack_knit(versioned_files):  | 
|
699  | 
versioned_files.stream.close()  | 
|
700  | 
versioned_files.writer.end()  | 
|
701  | 
||
702  | 
||
703  | 
class KnitVersionedFiles(VersionedFiles):  | 
|
704  | 
"""Storage for many versioned files using knit compression.  | 
|
705  | 
||
706  | 
    Backend storage is managed by indices and data objects.
 | 
|
| 
3582.1.14
by Martin Pool
 Clearer comments about KnitVersionedFile stacking  | 
707  | 
|
708  | 
    :ivar _index: A _KnitGraphIndex or similar that can describe the 
 | 
|
709  | 
        parents, graph, compression and data location of entries in this 
 | 
|
710  | 
        KnitVersionedFiles.  Note that this is only the index for 
 | 
|
| 
3582.1.16
by Martin Pool
 Review feedback and news entry  | 
711  | 
        *this* vfs; if there are fallbacks they must be queried separately.
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
712  | 
    """
 | 
713  | 
||
714  | 
def __init__(self, index, data_access, max_delta_chain=200,  | 
|
| 
3789.2.1
by John Arbash Meinel
 _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.  | 
715  | 
annotated=False, reload_func=None):  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
716  | 
"""Create a KnitVersionedFiles with index and data_access.  | 
717  | 
||
718  | 
        :param index: The index for the knit data.
 | 
|
719  | 
        :param data_access: The access object to store and retrieve knit
 | 
|
720  | 
            records.
 | 
|
721  | 
        :param max_delta_chain: The maximum number of deltas to permit during
 | 
|
722  | 
            insertion. Set to 0 to prohibit the use of deltas.
 | 
|
723  | 
        :param annotated: Set to True to cause annotations to be calculated and
 | 
|
724  | 
            stored during insertion.
 | 
|
| 
3789.2.1
by John Arbash Meinel
 _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.  | 
725  | 
        :param reload_func: An function that can be called if we think we need
 | 
726  | 
            to reload the pack listing and try again. See
 | 
|
727  | 
            'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.
 | 
|
| 
1563.2.25
by Robert Collins
 Merge in upstream.  | 
728  | 
        """
 | 
| 
3316.2.3
by Robert Collins
 Remove manual notification of transaction finishing on versioned files.  | 
729  | 
self._index = index  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
730  | 
self._access = data_access  | 
731  | 
self._max_delta_chain = max_delta_chain  | 
|
732  | 
if annotated:  | 
|
733  | 
self._factory = KnitAnnotateFactory()  | 
|
734  | 
else:  | 
|
735  | 
self._factory = KnitPlainFactory()  | 
|
| 
3350.8.1
by Robert Collins
 KnitVersionedFiles.add_fallback_versioned_files exists.  | 
736  | 
self._fallback_vfs = []  | 
| 
3789.2.1
by John Arbash Meinel
 _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.  | 
737  | 
self._reload_func = reload_func  | 
| 
3350.8.1
by Robert Collins
 KnitVersionedFiles.add_fallback_versioned_files exists.  | 
738  | 
|
| 
3702.1.1
by Martin Pool
 Add repr for KnitVersionedFiles  | 
739  | 
def __repr__(self):  | 
740  | 
return "%s(%r, %r)" % (  | 
|
741  | 
self.__class__.__name__,  | 
|
742  | 
self._index,  | 
|
743  | 
self._access)  | 
|
744  | 
||
| 
3350.8.1
by Robert Collins
 KnitVersionedFiles.add_fallback_versioned_files exists.  | 
745  | 
def add_fallback_versioned_files(self, a_versioned_files):  | 
746  | 
"""Add a source of texts for texts not present in this knit.  | 
|
747  | 
||
748  | 
        :param a_versioned_files: A VersionedFiles object.
 | 
|
749  | 
        """
 | 
|
750  | 
self._fallback_vfs.append(a_versioned_files)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
751  | 
|
752  | 
def add_lines(self, key, parents, lines, parent_texts=None,  | 
|
753  | 
left_matching_blocks=None, nostore_sha=None, random_id=False,  | 
|
754  | 
check_content=True):  | 
|
755  | 
"""See VersionedFiles.add_lines()."""  | 
|
756  | 
self._index._check_write_ok()  | 
|
757  | 
self._check_add(key, lines, random_id, check_content)  | 
|
758  | 
if parents is None:  | 
|
| 
3350.6.11
by Martin Pool
 Review cleanups and documentation from Robert's mail on 2080618  | 
759  | 
            # The caller might pass None if there is no graph data, but kndx
 | 
760  | 
            # indexes can't directly store that, so we give them
 | 
|
761  | 
            # an empty tuple instead.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
762  | 
parents = ()  | 
763  | 
return self._add(key, lines, parents,  | 
|
764  | 
parent_texts, left_matching_blocks, nostore_sha, random_id)  | 
|
765  | 
||
766  | 
def _add(self, key, lines, parents, parent_texts,  | 
|
767  | 
left_matching_blocks, nostore_sha, random_id):  | 
|
768  | 
"""Add a set of lines on top of version specified by parents.  | 
|
769  | 
||
770  | 
        Any versions not present will be converted into ghosts.
 | 
|
771  | 
        """
 | 
|
772  | 
        # first thing, if the content is something we don't need to store, find
 | 
|
773  | 
        # that out.
 | 
|
774  | 
line_bytes = ''.join(lines)  | 
|
775  | 
digest = sha_string(line_bytes)  | 
|
776  | 
if nostore_sha == digest:  | 
|
777  | 
raise errors.ExistingContent  | 
|
778  | 
||
779  | 
present_parents = []  | 
|
780  | 
if parent_texts is None:  | 
|
781  | 
parent_texts = {}  | 
|
| 
3830.3.9
by Martin Pool
 Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests  | 
782  | 
        # Do a single query to ascertain parent presence; we only compress
 | 
783  | 
        # against parents in the same kvf.
 | 
|
784  | 
present_parent_map = self._index.get_parent_map(parents)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
785  | 
for parent in parents:  | 
786  | 
if parent in present_parent_map:  | 
|
787  | 
present_parents.append(parent)  | 
|
788  | 
||
789  | 
        # Currently we can only compress against the left most present parent.
 | 
|
790  | 
if (len(present_parents) == 0 or  | 
|
791  | 
present_parents[0] != parents[0]):  | 
|
792  | 
delta = False  | 
|
793  | 
else:  | 
|
794  | 
            # To speed the extract of texts the delta chain is limited
 | 
|
795  | 
            # to a fixed number of deltas.  This should minimize both
 | 
|
796  | 
            # I/O and the time spend applying deltas.
 | 
|
797  | 
delta = self._check_should_delta(present_parents[0])  | 
|
798  | 
||
799  | 
text_length = len(line_bytes)  | 
|
800  | 
options = []  | 
|
801  | 
if lines:  | 
|
802  | 
if lines[-1][-1] != '\n':  | 
|
803  | 
                # copy the contents of lines.
 | 
|
804  | 
lines = lines[:]  | 
|
805  | 
options.append('no-eol')  | 
|
806  | 
lines[-1] = lines[-1] + '\n'  | 
|
807  | 
line_bytes += '\n'  | 
|
808  | 
||
809  | 
for element in key:  | 
|
810  | 
if type(element) != str:  | 
|
811  | 
raise TypeError("key contains non-strings: %r" % (key,))  | 
|
812  | 
        # Knit hunks are still last-element only
 | 
|
813  | 
version_id = key[-1]  | 
|
814  | 
content = self._factory.make(lines, version_id)  | 
|
815  | 
if 'no-eol' in options:  | 
|
816  | 
            # Hint to the content object that its text() call should strip the
 | 
|
817  | 
            # EOL.
 | 
|
818  | 
content._should_strip_eol = True  | 
|
819  | 
if delta or (self._factory.annotated and len(present_parents) > 0):  | 
|
820  | 
            # Merge annotations from parent texts if needed.
 | 
|
821  | 
delta_hunks = self._merge_annotations(content, present_parents,  | 
|
822  | 
parent_texts, delta, self._factory.annotated,  | 
|
823  | 
left_matching_blocks)  | 
|
824  | 
||
825  | 
if delta:  | 
|
826  | 
options.append('line-delta')  | 
|
827  | 
store_lines = self._factory.lower_line_delta(delta_hunks)  | 
|
828  | 
size, bytes = self._record_to_data(key, digest,  | 
|
829  | 
store_lines)  | 
|
830  | 
else:  | 
|
831  | 
options.append('fulltext')  | 
|
832  | 
            # isinstance is slower and we have no hierarchy.
 | 
|
833  | 
if self._factory.__class__ == KnitPlainFactory:  | 
|
834  | 
                # Use the already joined bytes saving iteration time in
 | 
|
835  | 
                # _record_to_data.
 | 
|
836  | 
size, bytes = self._record_to_data(key, digest,  | 
|
837  | 
lines, [line_bytes])  | 
|
838  | 
else:  | 
|
839  | 
                # get mixed annotation + content and feed it into the
 | 
|
840  | 
                # serialiser.
 | 
|
841  | 
store_lines = self._factory.lower_fulltext(content)  | 
|
842  | 
size, bytes = self._record_to_data(key, digest,  | 
|
843  | 
store_lines)  | 
|
844  | 
||
845  | 
access_memo = self._access.add_raw_records([(key, size)], bytes)[0]  | 
|
846  | 
self._index.add_records(  | 
|
847  | 
((key, options, access_memo, parents),),  | 
|
848  | 
random_id=random_id)  | 
|
849  | 
return digest, text_length, content  | 
|
850  | 
||
851  | 
def annotate(self, key):  | 
|
852  | 
"""See VersionedFiles.annotate."""  | 
|
853  | 
return self._factory.annotate(self, key)  | 
|
854  | 
||
855  | 
def check(self, progress_bar=None):  | 
|
856  | 
"""See VersionedFiles.check()."""  | 
|
857  | 
        # This doesn't actually test extraction of everything, but that will
 | 
|
858  | 
        # impact 'bzr check' substantially, and needs to be integrated with
 | 
|
859  | 
        # care. However, it does check for the obvious problem of a delta with
 | 
|
860  | 
        # no basis.
 | 
|
| 
3517.4.14
by Martin Pool
 KnitVersionedFiles.check should just check its own keys then recurse into fallbacks  | 
861  | 
keys = self._index.keys()  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
862  | 
parent_map = self.get_parent_map(keys)  | 
863  | 
for key in keys:  | 
|
864  | 
if self._index.get_method(key) != 'fulltext':  | 
|
865  | 
compression_parent = parent_map[key][0]  | 
|
866  | 
if compression_parent not in parent_map:  | 
|
867  | 
raise errors.KnitCorrupt(self,  | 
|
868  | 
"Missing basis parent %s for %s" % (  | 
|
869  | 
compression_parent, key))  | 
|
| 
3517.4.14
by Martin Pool
 KnitVersionedFiles.check should just check its own keys then recurse into fallbacks  | 
870  | 
for fallback_vfs in self._fallback_vfs:  | 
871  | 
fallback_vfs.check()  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
872  | 
|
873  | 
def _check_add(self, key, lines, random_id, check_content):  | 
|
874  | 
"""check that version_id and lines are safe to add."""  | 
|
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
875  | 
version_id = key[-1]  | 
876  | 
if contains_whitespace(version_id):  | 
|
| 
3517.3.1
by Andrew Bennetts
 Fix error in error path.  | 
877  | 
raise InvalidRevisionId(version_id, self)  | 
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
878  | 
self.check_not_reserved_id(version_id)  | 
| 
3350.6.11
by Martin Pool
 Review cleanups and documentation from Robert's mail on 2080618  | 
879  | 
        # TODO: If random_id==False and the key is already present, we should
 | 
880  | 
        # probably check that the existing content is identical to what is
 | 
|
881  | 
        # being inserted, and otherwise raise an exception.  This would make
 | 
|
882  | 
        # the bundle code simpler.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
883  | 
if check_content:  | 
884  | 
self._check_lines_not_unicode(lines)  | 
|
885  | 
self._check_lines_are_lines(lines)  | 
|
886  | 
||
887  | 
def _check_header(self, key, line):  | 
|
888  | 
rec = self._split_header(line)  | 
|
889  | 
self._check_header_version(rec, key[-1])  | 
|
890  | 
return rec  | 
|
891  | 
||
892  | 
def _check_header_version(self, rec, version_id):  | 
|
893  | 
"""Checks the header version on original format knit records.  | 
|
894  | 
        
 | 
|
895  | 
        These have the last component of the key embedded in the record.
 | 
|
896  | 
        """
 | 
|
897  | 
if rec[1] != version_id:  | 
|
898  | 
raise KnitCorrupt(self,  | 
|
899  | 
'unexpected version, wanted %r, got %r' % (version_id, rec[1]))  | 
|
900  | 
||
901  | 
def _check_should_delta(self, parent):  | 
|
| 
2147.1.1
by John Arbash Meinel
 Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size  | 
902  | 
"""Iterate back through the parent listing, looking for a fulltext.  | 
903  | 
||
904  | 
        This is used when we want to decide whether to add a delta or a new
 | 
|
905  | 
        fulltext. It searches for _max_delta_chain parents. When it finds a
 | 
|
906  | 
        fulltext parent, it sees if the total size of the deltas leading up to
 | 
|
907  | 
        it is large enough to indicate that we want a new full text anyway.
 | 
|
908  | 
||
909  | 
        Return True if we should create a new delta, False if we should use a
 | 
|
910  | 
        full text.
 | 
|
911  | 
        """
 | 
|
912  | 
delta_size = 0  | 
|
913  | 
fulltext_size = None  | 
|
| 
2147.1.2
by John Arbash Meinel
 Simplify the knit max-chain detection code.  | 
914  | 
for count in xrange(self._max_delta_chain):  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
915  | 
            # XXX: Collapse these two queries:
 | 
| 
3350.8.9
by Robert Collins
 define behaviour for add_lines with stacked storage.  | 
916  | 
try:  | 
| 
3582.1.14
by Martin Pool
 Clearer comments about KnitVersionedFile stacking  | 
917  | 
                # Note that this only looks in the index of this particular
 | 
918  | 
                # KnitVersionedFiles, not in the fallbacks.  This ensures that
 | 
|
919  | 
                # we won't store a delta spanning physical repository
 | 
|
920  | 
                # boundaries.
 | 
|
| 
3350.8.9
by Robert Collins
 define behaviour for add_lines with stacked storage.  | 
921  | 
method = self._index.get_method(parent)  | 
922  | 
except RevisionNotPresent:  | 
|
923  | 
                # Some basis is not locally present: always delta
 | 
|
924  | 
return False  | 
|
| 
2592.3.71
by Robert Collins
 Basic version of knit-based repository operating, many tests failing.  | 
925  | 
index, pos, size = self._index.get_position(parent)  | 
| 
2147.1.1
by John Arbash Meinel
 Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size  | 
926  | 
if method == 'fulltext':  | 
927  | 
fulltext_size = size  | 
|
928  | 
                break
 | 
|
929  | 
delta_size += size  | 
|
| 
3350.6.11
by Martin Pool
 Review cleanups and documentation from Robert's mail on 2080618  | 
930  | 
            # We don't explicitly check for presence because this is in an
 | 
931  | 
            # inner loop, and if it's missing it'll fail anyhow.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
932  | 
            # TODO: This should be asking for compression parent, not graph
 | 
933  | 
            # parent.
 | 
|
934  | 
parent = self._index.get_parent_map([parent])[parent][0]  | 
|
| 
2147.1.2
by John Arbash Meinel
 Simplify the knit max-chain detection code.  | 
935  | 
else:  | 
936  | 
            # We couldn't find a fulltext, so we must create a new one
 | 
|
| 
2147.1.1
by John Arbash Meinel
 Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size  | 
937  | 
return False  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
938  | 
        # Simple heuristic - if the total I/O wold be greater as a delta than
 | 
939  | 
        # the originally installed fulltext, we create a new fulltext.
 | 
|
| 
2147.1.2
by John Arbash Meinel
 Simplify the knit max-chain detection code.  | 
940  | 
return fulltext_size > delta_size  | 
| 
2147.1.1
by John Arbash Meinel
 Factor the common knit delta selection into a helper func, and allow the fulltext to be chosen based on cumulative delta size  | 
941  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
942  | 
def _build_details_to_components(self, build_details):  | 
943  | 
"""Convert a build_details tuple to a position tuple."""  | 
|
944  | 
        # record_details, access_memo, compression_parent
 | 
|
945  | 
return build_details[3], build_details[0], build_details[1]  | 
|
946  | 
||
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
947  | 
def _get_components_positions(self, keys, allow_missing=False):  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
948  | 
"""Produce a map of position data for the components of keys.  | 
949  | 
||
950  | 
        This data is intended to be used for retrieving the knit records.
 | 
|
951  | 
||
952  | 
        A dict of key to (record_details, index_memo, next, parents) is
 | 
|
953  | 
        returned.
 | 
|
954  | 
        method is the way referenced data should be applied.
 | 
|
955  | 
        index_memo is the handle to pass to the data access to actually get the
 | 
|
956  | 
            data
 | 
|
957  | 
        next is the build-parent of the version, or None for fulltexts.
 | 
|
958  | 
        parents is the version_ids of the parents of this version
 | 
|
959  | 
||
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
960  | 
        :param allow_missing: If True do not raise an error on a missing component,
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
961  | 
            just ignore it.
 | 
962  | 
        """
 | 
|
963  | 
component_data = {}  | 
|
964  | 
pending_components = keys  | 
|
965  | 
while pending_components:  | 
|
966  | 
build_details = self._index.get_build_details(pending_components)  | 
|
967  | 
current_components = set(pending_components)  | 
|
968  | 
pending_components = set()  | 
|
969  | 
for key, details in build_details.iteritems():  | 
|
970  | 
(index_memo, compression_parent, parents,  | 
|
971  | 
record_details) = details  | 
|
972  | 
method = record_details[0]  | 
|
973  | 
if compression_parent is not None:  | 
|
974  | 
pending_components.add(compression_parent)  | 
|
975  | 
component_data[key] = self._build_details_to_components(details)  | 
|
976  | 
missing = current_components.difference(build_details)  | 
|
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
977  | 
if missing and not allow_missing:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
978  | 
raise errors.RevisionNotPresent(missing.pop(), self)  | 
979  | 
return component_data  | 
|
980  | 
||
981  | 
def _get_content(self, key, parent_texts={}):  | 
|
982  | 
"""Returns a content object that makes up the specified  | 
|
983  | 
        version."""
 | 
|
984  | 
cached_version = parent_texts.get(key, None)  | 
|
985  | 
if cached_version is not None:  | 
|
986  | 
            # Ensure the cache dict is valid.
 | 
|
987  | 
if not self.get_parent_map([key]):  | 
|
988  | 
raise RevisionNotPresent(key, self)  | 
|
989  | 
return cached_version  | 
|
990  | 
text_map, contents_map = self._get_content_maps([key])  | 
|
991  | 
return contents_map[key]  | 
|
992  | 
||
| 
3350.8.7
by Robert Collins
 get_record_stream for fulltexts working (but note extreme memory use!).  | 
993  | 
def _get_content_maps(self, keys, nonlocal_keys=None):  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
994  | 
"""Produce maps of text and KnitContents  | 
995  | 
        
 | 
|
| 
3350.8.7
by Robert Collins
 get_record_stream for fulltexts working (but note extreme memory use!).  | 
996  | 
        :param keys: The keys to produce content maps for.
 | 
997  | 
        :param nonlocal_keys: An iterable of keys(possibly intersecting keys)
 | 
|
998  | 
            which are known to not be in this knit, but rather in one of the
 | 
|
999  | 
            fallback knits.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1000  | 
        :return: (text_map, content_map) where text_map contains the texts for
 | 
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
1001  | 
            the requested versions and content_map contains the KnitContents.
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1002  | 
        """
 | 
1003  | 
        # FUTURE: This function could be improved for the 'extract many' case
 | 
|
1004  | 
        # by tracking each component and only doing the copy when the number of
 | 
|
1005  | 
        # children than need to apply delta's to it is > 1 or it is part of the
 | 
|
1006  | 
        # final output.
 | 
|
1007  | 
keys = list(keys)  | 
|
1008  | 
multiple_versions = len(keys) != 1  | 
|
| 
3350.8.7
by Robert Collins
 get_record_stream for fulltexts working (but note extreme memory use!).  | 
1009  | 
record_map = self._get_record_map(keys, allow_missing=True)  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1010  | 
|
1011  | 
text_map = {}  | 
|
1012  | 
content_map = {}  | 
|
1013  | 
final_content = {}  | 
|
| 
3350.8.7
by Robert Collins
 get_record_stream for fulltexts working (but note extreme memory use!).  | 
1014  | 
if nonlocal_keys is None:  | 
1015  | 
nonlocal_keys = set()  | 
|
1016  | 
else:  | 
|
1017  | 
nonlocal_keys = frozenset(nonlocal_keys)  | 
|
1018  | 
missing_keys = set(nonlocal_keys)  | 
|
1019  | 
for source in self._fallback_vfs:  | 
|
1020  | 
if not missing_keys:  | 
|
1021  | 
                break
 | 
|
1022  | 
for record in source.get_record_stream(missing_keys,  | 
|
1023  | 
'unordered', True):  | 
|
1024  | 
if record.storage_kind == 'absent':  | 
|
1025  | 
                    continue
 | 
|
1026  | 
missing_keys.remove(record.key)  | 
|
| 
3890.2.9
by John Arbash Meinel
 Start using osutils.chunks_as_lines rather than osutils.split_lines.  | 
1027  | 
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))  | 
| 
3350.8.7
by Robert Collins
 get_record_stream for fulltexts working (but note extreme memory use!).  | 
1028  | 
text_map[record.key] = lines  | 
| 
3350.8.10
by Robert Collins
 Stacked insert_record_stream.  | 
1029  | 
content_map[record.key] = PlainKnitContent(lines, record.key)  | 
1030  | 
if record.key in keys:  | 
|
1031  | 
final_content[record.key] = content_map[record.key]  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1032  | 
for key in keys:  | 
| 
3350.8.7
by Robert Collins
 get_record_stream for fulltexts working (but note extreme memory use!).  | 
1033  | 
if key in nonlocal_keys:  | 
1034  | 
                # already handled
 | 
|
1035  | 
                continue
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1036  | 
components = []  | 
1037  | 
cursor = key  | 
|
1038  | 
while cursor is not None:  | 
|
| 
3350.8.10
by Robert Collins
 Stacked insert_record_stream.  | 
1039  | 
try:  | 
1040  | 
record, record_details, digest, next = record_map[cursor]  | 
|
1041  | 
except KeyError:  | 
|
1042  | 
raise RevisionNotPresent(cursor, self)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1043  | 
components.append((cursor, record, record_details, digest))  | 
| 
3350.8.10
by Robert Collins
 Stacked insert_record_stream.  | 
1044  | 
cursor = next  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1045  | 
if cursor in content_map:  | 
| 
3350.8.10
by Robert Collins
 Stacked insert_record_stream.  | 
1046  | 
                    # no need to plan further back
 | 
1047  | 
components.append((cursor, None, None, None))  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1048  | 
                    break
 | 
1049  | 
||
1050  | 
content = None  | 
|
1051  | 
for (component_id, record, record_details,  | 
|
1052  | 
digest) in reversed(components):  | 
|
1053  | 
if component_id in content_map:  | 
|
1054  | 
content = content_map[component_id]  | 
|
1055  | 
else:  | 
|
1056  | 
content, delta = self._factory.parse_record(key[-1],  | 
|
1057  | 
record, record_details, content,  | 
|
1058  | 
copy_base_content=multiple_versions)  | 
|
1059  | 
if multiple_versions:  | 
|
1060  | 
content_map[component_id] = content  | 
|
1061  | 
||
1062  | 
final_content[key] = content  | 
|
1063  | 
||
1064  | 
            # digest here is the digest from the last applied component.
 | 
|
1065  | 
text = content.text()  | 
|
1066  | 
actual_sha = sha_strings(text)  | 
|
1067  | 
if actual_sha != digest:  | 
|
| 
3787.1.1
by Robert Collins
 Embed the failed text in sha1 knit errors.  | 
1068  | 
raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1069  | 
text_map[key] = text  | 
1070  | 
return text_map, final_content  | 
|
1071  | 
||
1072  | 
def get_parent_map(self, keys):  | 
|
| 
3517.4.17
by Martin Pool
 Redo base Repository.get_parent_map to use .revisions graph  | 
1073  | 
"""Get a map of the graph parents of keys.  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1074  | 
|
1075  | 
        :param keys: The keys to look up parents for.
 | 
|
1076  | 
        :return: A mapping from keys to parents. Absent keys are absent from
 | 
|
1077  | 
            the mapping.
 | 
|
1078  | 
        """
 | 
|
| 
3350.8.14
by Robert Collins
 Review feedback.  | 
1079  | 
return self._get_parent_map_with_sources(keys)[0]  | 
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1080  | 
|
| 
3350.8.14
by Robert Collins
 Review feedback.  | 
1081  | 
def _get_parent_map_with_sources(self, keys):  | 
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1082  | 
"""Get a map of the parents of keys.  | 
1083  | 
||
1084  | 
        :param keys: The keys to look up parents for.
 | 
|
1085  | 
        :return: A tuple. The first element is a mapping from keys to parents.
 | 
|
1086  | 
            Absent keys are absent from the mapping. The second element is a
 | 
|
1087  | 
            list with the locations each key was found in. The first element
 | 
|
1088  | 
            is the in-this-knit parents, the second the first fallback source,
 | 
|
1089  | 
            and so on.
 | 
|
1090  | 
        """
 | 
|
| 
3350.8.2
by Robert Collins
 stacked get_parent_map.  | 
1091  | 
result = {}  | 
1092  | 
sources = [self._index] + self._fallback_vfs  | 
|
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1093  | 
source_results = []  | 
| 
3350.8.2
by Robert Collins
 stacked get_parent_map.  | 
1094  | 
missing = set(keys)  | 
1095  | 
for source in sources:  | 
|
1096  | 
if not missing:  | 
|
1097  | 
                break
 | 
|
1098  | 
new_result = source.get_parent_map(missing)  | 
|
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1099  | 
source_results.append(new_result)  | 
| 
3350.8.2
by Robert Collins
 stacked get_parent_map.  | 
1100  | 
result.update(new_result)  | 
1101  | 
missing.difference_update(set(new_result))  | 
|
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1102  | 
return result, source_results  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1103  | 
|
| 
3350.8.3
by Robert Collins
 VF.get_sha1s needed changing to be stackable.  | 
1104  | 
def _get_record_map(self, keys, allow_missing=False):  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1105  | 
"""Produce a dictionary of knit records.  | 
1106  | 
        
 | 
|
1107  | 
        :return: {key:(record, record_details, digest, next)}
 | 
|
1108  | 
            record
 | 
|
1109  | 
                data returned from read_records
 | 
|
1110  | 
            record_details
 | 
|
1111  | 
                opaque information to pass to parse_record
 | 
|
1112  | 
            digest
 | 
|
1113  | 
                SHA1 digest of the full text after all steps are done
 | 
|
1114  | 
            next
 | 
|
1115  | 
                build-parent of the version, i.e. the leftmost ancestor.
 | 
|
1116  | 
                Will be None if the record is not a delta.
 | 
|
| 
3350.8.3
by Robert Collins
 VF.get_sha1s needed changing to be stackable.  | 
1117  | 
        :param keys: The keys to build a map for
 | 
1118  | 
        :param allow_missing: If some records are missing, rather than 
 | 
|
1119  | 
            error, just return the data that could be generated.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1120  | 
        """
 | 
| 
3789.2.11
by John Arbash Meinel
 KnitVersionedFile.get_record_stream now retries *and* fails correctly.  | 
1121  | 
        # This retries the whole request if anything fails. Potentially we
 | 
1122  | 
        # could be a bit more selective. We could track the keys whose records
 | 
|
1123  | 
        # we have successfully found, and then only request the new records
 | 
|
1124  | 
        # from there. However, _get_components_positions grabs the whole build
 | 
|
1125  | 
        # chain, which means we'll likely try to grab the same records again
 | 
|
1126  | 
        # anyway. Also, can the build chains change as part of a pack
 | 
|
1127  | 
        # operation? We wouldn't want to end up with a broken chain.
 | 
|
| 
3789.2.10
by John Arbash Meinel
 The first function for KnitVersionedFiles can now retry on request.  | 
1128  | 
while True:  | 
1129  | 
try:  | 
|
1130  | 
position_map = self._get_components_positions(keys,  | 
|
1131  | 
allow_missing=allow_missing)  | 
|
| 
3789.2.11
by John Arbash Meinel
 KnitVersionedFile.get_record_stream now retries *and* fails correctly.  | 
1132  | 
                # key = component_id, r = record_details, i_m = index_memo,
 | 
1133  | 
                # n = next
 | 
|
| 
3789.2.10
by John Arbash Meinel
 The first function for KnitVersionedFiles can now retry on request.  | 
1134  | 
records = [(key, i_m) for key, (r, i_m, n)  | 
| 
3789.2.11
by John Arbash Meinel
 KnitVersionedFile.get_record_stream now retries *and* fails correctly.  | 
1135  | 
in position_map.iteritems()]  | 
| 
3789.2.10
by John Arbash Meinel
 The first function for KnitVersionedFiles can now retry on request.  | 
1136  | 
record_map = {}  | 
| 
3789.2.11
by John Arbash Meinel
 KnitVersionedFile.get_record_stream now retries *and* fails correctly.  | 
1137  | 
for key, record, digest in self._read_records_iter(records):  | 
| 
3789.2.10
by John Arbash Meinel
 The first function for KnitVersionedFiles can now retry on request.  | 
1138  | 
(record_details, index_memo, next) = position_map[key]  | 
1139  | 
record_map[key] = record, record_details, digest, next  | 
|
1140  | 
return record_map  | 
|
1141  | 
except errors.RetryWithNewPacks, e:  | 
|
1142  | 
self._access.reload_or_raise(e)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1143  | 
|
| 
3763.4.1
by John Arbash Meinel
 Possible fix for bug #269456.  | 
1144  | 
def _split_by_prefix(self, keys):  | 
1145  | 
"""For the given keys, split them up based on their prefix.  | 
|
1146  | 
||
1147  | 
        To keep memory pressure somewhat under control, split the
 | 
|
1148  | 
        requests back into per-file-id requests, otherwise "bzr co"
 | 
|
1149  | 
        extracts the full tree into memory before writing it to disk.
 | 
|
1150  | 
        This should be revisited if _get_content_maps() can ever cross
 | 
|
1151  | 
        file-id boundaries.
 | 
|
1152  | 
||
1153  | 
        :param keys: An iterable of key tuples
 | 
|
1154  | 
        :return: A dict of {prefix: [key_list]}
 | 
|
1155  | 
        """
 | 
|
1156  | 
split_by_prefix = {}  | 
|
1157  | 
for key in keys:  | 
|
1158  | 
if len(key) == 1:  | 
|
1159  | 
split_by_prefix.setdefault('', []).append(key)  | 
|
1160  | 
else:  | 
|
1161  | 
split_by_prefix.setdefault(key[0], []).append(key)  | 
|
1162  | 
return split_by_prefix  | 
|
1163  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1164  | 
def get_record_stream(self, keys, ordering, include_delta_closure):  | 
1165  | 
"""Get a stream of records for keys.  | 
|
1166  | 
||
1167  | 
        :param keys: The keys to include.
 | 
|
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
1168  | 
        :param ordering: Either 'unordered' or 'topological'. A topologically
 | 
1169  | 
            sorted stream has compression parents strictly before their
 | 
|
1170  | 
            children.
 | 
|
1171  | 
        :param include_delta_closure: If True then the closure across any
 | 
|
1172  | 
            compression parents will be included (in the opaque data).
 | 
|
1173  | 
        :return: An iterator of ContentFactory objects, each of which is only
 | 
|
1174  | 
            valid until the iterator is advanced.
 | 
|
1175  | 
        """
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1176  | 
        # keys might be a generator
 | 
1177  | 
keys = set(keys)  | 
|
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1178  | 
if not keys:  | 
1179  | 
            return
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1180  | 
if not self._index.has_graph:  | 
1181  | 
            # Cannot topological order when no graph has been stored.
 | 
|
1182  | 
ordering = 'unordered'  | 
|
| 
3789.2.1
by John Arbash Meinel
 _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.  | 
1183  | 
|
1184  | 
remaining_keys = keys  | 
|
1185  | 
while True:  | 
|
1186  | 
try:  | 
|
1187  | 
keys = set(remaining_keys)  | 
|
1188  | 
for content_factory in self._get_remaining_record_stream(keys,  | 
|
1189  | 
ordering, include_delta_closure):  | 
|
1190  | 
remaining_keys.discard(content_factory.key)  | 
|
1191  | 
yield content_factory  | 
|
1192  | 
                return
 | 
|
1193  | 
except errors.RetryWithNewPacks, e:  | 
|
| 
3789.2.11
by John Arbash Meinel
 KnitVersionedFile.get_record_stream now retries *and* fails correctly.  | 
1194  | 
self._access.reload_or_raise(e)  | 
| 
3789.2.1
by John Arbash Meinel
 _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.  | 
1195  | 
|
1196  | 
def _get_remaining_record_stream(self, keys, ordering,  | 
|
1197  | 
include_delta_closure):  | 
|
| 
3789.2.4
by John Arbash Meinel
 Add a multiple-record test, though it isn't quite what we want for the readv tests.  | 
1198  | 
"""This function is the 'retry' portion for get_record_stream."""  | 
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
1199  | 
if include_delta_closure:  | 
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
1200  | 
positions = self._get_components_positions(keys, allow_missing=True)  | 
| 
3350.3.3
by Robert Collins
 Functional get_record_stream interface tests covering full interface.  | 
1201  | 
else:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1202  | 
build_details = self._index.get_build_details(keys)  | 
| 
3350.6.11
by Martin Pool
 Review cleanups and documentation from Robert's mail on 2080618  | 
1203  | 
            # map from key to
 | 
1204  | 
            # (record_details, access_memo, compression_parent_key)
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1205  | 
positions = dict((key, self._build_details_to_components(details))  | 
1206  | 
for key, details in build_details.iteritems())  | 
|
1207  | 
absent_keys = keys.difference(set(positions))  | 
|
1208  | 
        # There may be more absent keys : if we're missing the basis component
 | 
|
1209  | 
        # and are trying to include the delta closure.
 | 
|
1210  | 
if include_delta_closure:  | 
|
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1211  | 
needed_from_fallback = set()  | 
| 
3350.6.11
by Martin Pool
 Review cleanups and documentation from Robert's mail on 2080618  | 
1212  | 
            # Build up reconstructable_keys dict.  key:True in this dict means
 | 
1213  | 
            # the key can be reconstructed.
 | 
|
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
1214  | 
reconstructable_keys = {}  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1215  | 
for key in keys:  | 
1216  | 
                # the delta chain
 | 
|
1217  | 
try:  | 
|
1218  | 
chain = [key, positions[key][2]]  | 
|
1219  | 
except KeyError:  | 
|
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1220  | 
needed_from_fallback.add(key)  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1221  | 
                    continue
 | 
1222  | 
result = True  | 
|
1223  | 
while chain[-1] is not None:  | 
|
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
1224  | 
if chain[-1] in reconstructable_keys:  | 
1225  | 
result = reconstructable_keys[chain[-1]]  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1226  | 
                        break
 | 
1227  | 
else:  | 
|
1228  | 
try:  | 
|
1229  | 
chain.append(positions[chain[-1]][2])  | 
|
1230  | 
except KeyError:  | 
|
1231  | 
                            # missing basis component
 | 
|
| 
3350.8.10
by Robert Collins
 Stacked insert_record_stream.  | 
1232  | 
needed_from_fallback.add(chain[-1])  | 
1233  | 
result = True  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1234  | 
                            break
 | 
1235  | 
for chain_key in chain[:-1]:  | 
|
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
1236  | 
reconstructable_keys[chain_key] = result  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1237  | 
if not result:  | 
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1238  | 
needed_from_fallback.add(key)  | 
1239  | 
        # Double index lookups here : need a unified api ?
 | 
|
| 
3350.8.14
by Robert Collins
 Review feedback.  | 
1240  | 
global_map, parent_maps = self._get_parent_map_with_sources(keys)  | 
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1241  | 
if ordering == 'topological':  | 
1242  | 
            # Global topological sort
 | 
|
| 
3535.5.1
by John Arbash Meinel
 cleanup a few imports to be lazily loaded.  | 
1243  | 
present_keys = tsort.topo_sort(global_map)  | 
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1244  | 
            # Now group by source:
 | 
1245  | 
source_keys = []  | 
|
1246  | 
current_source = None  | 
|
1247  | 
for key in present_keys:  | 
|
1248  | 
for parent_map in parent_maps:  | 
|
1249  | 
if key in parent_map:  | 
|
1250  | 
key_source = parent_map  | 
|
1251  | 
                        break
 | 
|
1252  | 
if current_source is not key_source:  | 
|
1253  | 
source_keys.append((key_source, []))  | 
|
1254  | 
current_source = key_source  | 
|
1255  | 
source_keys[-1][1].append(key)  | 
|
1256  | 
else:  | 
|
| 
3606.7.7
by John Arbash Meinel
 Add tests for the fetching behavior.  | 
1257  | 
if ordering != 'unordered':  | 
1258  | 
raise AssertionError('valid values for ordering are:'  | 
|
1259  | 
' "unordered" or "topological" not: %r'  | 
|
1260  | 
% (ordering,))  | 
|
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1261  | 
            # Just group by source; remote sources first.
 | 
1262  | 
present_keys = []  | 
|
1263  | 
source_keys = []  | 
|
1264  | 
for parent_map in reversed(parent_maps):  | 
|
1265  | 
source_keys.append((parent_map, []))  | 
|
1266  | 
for key in parent_map:  | 
|
1267  | 
present_keys.append(key)  | 
|
1268  | 
source_keys[-1][1].append(key)  | 
|
| 
3878.1.1
by John Arbash Meinel
 KVF.get_record_stream('unordered') now returns the records based on I/O ordering.  | 
1269  | 
            # We have been requested to return these records in an order that
 | 
| 
3878.1.2
by John Arbash Meinel
 Move the sorting into each index, and customize it for Kndx access.  | 
1270  | 
            # suits us. So we ask the index to give us an optimally sorted
 | 
1271  | 
            # order.
 | 
|
| 
3878.1.1
by John Arbash Meinel
 KVF.get_record_stream('unordered') now returns the records based on I/O ordering.  | 
1272  | 
for source, sub_keys in source_keys:  | 
1273  | 
if source is parent_maps[0]:  | 
|
| 
3878.1.2
by John Arbash Meinel
 Move the sorting into each index, and customize it for Kndx access.  | 
1274  | 
                    # Only sort the keys for this VF
 | 
1275  | 
self._index._sort_keys_by_io(sub_keys, positions)  | 
|
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1276  | 
absent_keys = keys - set(global_map)  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1277  | 
for key in absent_keys:  | 
1278  | 
yield AbsentContentFactory(key)  | 
|
1279  | 
        # restrict our view to the keys we can answer.
 | 
|
1280  | 
        # XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.
 | 
|
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1281  | 
        # XXX: At that point we need to consider the impact of double reads by
 | 
1282  | 
        # utilising components multiple times.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1283  | 
if include_delta_closure:  | 
1284  | 
            # XXX: get_content_maps performs its own index queries; allow state
 | 
|
1285  | 
            # to be passed in.
 | 
|
| 
3763.4.1
by John Arbash Meinel
 Possible fix for bug #269456.  | 
1286  | 
non_local_keys = needed_from_fallback - absent_keys  | 
1287  | 
prefix_split_keys = self._split_by_prefix(present_keys)  | 
|
1288  | 
prefix_split_non_local_keys = self._split_by_prefix(non_local_keys)  | 
|
1289  | 
for prefix, keys in prefix_split_keys.iteritems():  | 
|
1290  | 
non_local = prefix_split_non_local_keys.get(prefix, [])  | 
|
1291  | 
non_local = set(non_local)  | 
|
1292  | 
text_map, _ = self._get_content_maps(keys, non_local)  | 
|
1293  | 
for key in keys:  | 
|
1294  | 
lines = text_map.pop(key)  | 
|
| 
3890.2.1
by John Arbash Meinel
 Start working on a ChunkedContentFactory.  | 
1295  | 
yield ChunkedContentFactory(key, global_map[key], None,  | 
1296  | 
lines)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1297  | 
else:  | 
| 
3350.8.6
by Robert Collins
 get_record_stream stacking for delta access.  | 
1298  | 
for source, keys in source_keys:  | 
1299  | 
if source is parent_maps[0]:  | 
|
1300  | 
                    # this KnitVersionedFiles
 | 
|
1301  | 
records = [(key, positions[key][1]) for key in keys]  | 
|
1302  | 
for key, raw_data, sha1 in self._read_records_iter_raw(records):  | 
|
1303  | 
(record_details, index_memo, _) = positions[key]  | 
|
1304  | 
yield KnitContentFactory(key, global_map[key],  | 
|
1305  | 
record_details, sha1, raw_data, self._factory.annotated, None)  | 
|
1306  | 
else:  | 
|
1307  | 
vf = self._fallback_vfs[parent_maps.index(source) - 1]  | 
|
1308  | 
for record in vf.get_record_stream(keys, ordering,  | 
|
1309  | 
include_delta_closure):  | 
|
1310  | 
yield record  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1311  | 
|
1312  | 
def get_sha1s(self, keys):  | 
|
1313  | 
"""See VersionedFiles.get_sha1s()."""  | 
|
| 
3350.8.3
by Robert Collins
 VF.get_sha1s needed changing to be stackable.  | 
1314  | 
missing = set(keys)  | 
1315  | 
record_map = self._get_record_map(missing, allow_missing=True)  | 
|
1316  | 
result = {}  | 
|
1317  | 
for key, details in record_map.iteritems():  | 
|
1318  | 
if key not in missing:  | 
|
1319  | 
                continue
 | 
|
1320  | 
            # record entry 2 is the 'digest'.
 | 
|
1321  | 
result[key] = details[2]  | 
|
1322  | 
missing.difference_update(set(result))  | 
|
1323  | 
for source in self._fallback_vfs:  | 
|
1324  | 
if not missing:  | 
|
1325  | 
                break
 | 
|
1326  | 
new_result = source.get_sha1s(missing)  | 
|
1327  | 
result.update(new_result)  | 
|
1328  | 
missing.difference_update(set(new_result))  | 
|
1329  | 
return result  | 
|
| 
3052.2.2
by Robert Collins
 * Operations pulling data from a smart server where the underlying  | 
1330  | 
|
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
1331  | 
def insert_record_stream(self, stream):  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1332  | 
"""Insert a record stream into this container.  | 
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
1333  | 
|
1334  | 
        :param stream: A stream of records to insert. 
 | 
|
1335  | 
        :return: None
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1336  | 
        :seealso VersionedFiles.get_record_stream:
 | 
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
1337  | 
        """
 | 
| 
3350.3.9
by Robert Collins
 Avoid full text reconstruction when transferring knit to knit via record streams.  | 
1338  | 
def get_adapter(adapter_key):  | 
1339  | 
try:  | 
|
1340  | 
return adapters[adapter_key]  | 
|
1341  | 
except KeyError:  | 
|
1342  | 
adapter_factory = adapter_registry.get(adapter_key)  | 
|
1343  | 
adapter = adapter_factory(self)  | 
|
1344  | 
adapters[adapter_key] = adapter  | 
|
1345  | 
return adapter  | 
|
| 
3871.4.3
by John Arbash Meinel
 We should only care if the compression parent is not available, not if all parents are available.  | 
1346  | 
delta_types = set()  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1347  | 
if self._factory.annotated:  | 
| 
3350.3.11
by Robert Collins
 Test inserting a stream that overlaps the current content of a knit does not error.  | 
1348  | 
            # self is annotated, we need annotated knits to use directly.
 | 
| 
3350.3.9
by Robert Collins
 Avoid full text reconstruction when transferring knit to knit via record streams.  | 
1349  | 
annotated = "annotated-"  | 
| 
3350.3.11
by Robert Collins
 Test inserting a stream that overlaps the current content of a knit does not error.  | 
1350  | 
convertibles = []  | 
| 
3350.3.9
by Robert Collins
 Avoid full text reconstruction when transferring knit to knit via record streams.  | 
1351  | 
else:  | 
| 
3350.3.11
by Robert Collins
 Test inserting a stream that overlaps the current content of a knit does not error.  | 
1352  | 
            # self is not annotated, but we can strip annotations cheaply.
 | 
| 
3350.3.9
by Robert Collins
 Avoid full text reconstruction when transferring knit to knit via record streams.  | 
1353  | 
annotated = ""  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1354  | 
convertibles = set(["knit-annotated-ft-gz"])  | 
1355  | 
if self._max_delta_chain:  | 
|
| 
3871.4.3
by John Arbash Meinel
 We should only care if the compression parent is not available, not if all parents are available.  | 
1356  | 
delta_types.add("knit-annotated-delta-gz")  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1357  | 
convertibles.add("knit-annotated-delta-gz")  | 
| 
3350.3.22
by Robert Collins
 Review feedback.  | 
1358  | 
        # The set of types we can cheaply adapt without needing basis texts.
 | 
| 
3350.3.9
by Robert Collins
 Avoid full text reconstruction when transferring knit to knit via record streams.  | 
1359  | 
native_types = set()  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1360  | 
if self._max_delta_chain:  | 
1361  | 
native_types.add("knit-%sdelta-gz" % annotated)  | 
|
| 
3871.4.3
by John Arbash Meinel
 We should only care if the compression parent is not available, not if all parents are available.  | 
1362  | 
delta_types.add("knit-%sdelta-gz" % annotated)  | 
| 
3350.3.9
by Robert Collins
 Avoid full text reconstruction when transferring knit to knit via record streams.  | 
1363  | 
native_types.add("knit-%sft-gz" % annotated)  | 
| 
3350.3.11
by Robert Collins
 Test inserting a stream that overlaps the current content of a knit does not error.  | 
1364  | 
knit_types = native_types.union(convertibles)  | 
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
1365  | 
adapters = {}  | 
| 
3350.3.22
by Robert Collins
 Review feedback.  | 
1366  | 
        # Buffer all index entries that we can't add immediately because their
 | 
| 
3350.3.17
by Robert Collins
 Prevent corrupt knits being created when a stream is interrupted with basis parents not present.  | 
1367  | 
        # basis parent is missing. We don't buffer all because generating
 | 
1368  | 
        # annotations may require access to some of the new records. However we
 | 
|
1369  | 
        # can't generate annotations from new deltas until their basis parent
 | 
|
1370  | 
        # is present anyway, so we get away with not needing an index that
 | 
|
| 
3350.3.22
by Robert Collins
 Review feedback.  | 
1371  | 
        # includes the new keys.
 | 
| 
3830.3.15
by Martin Pool
 Check against all parents when deciding whether to store a fulltext in a stacked repository  | 
1372  | 
        #
 | 
1373  | 
        # See <http://launchpad.net/bugs/300177> about ordering of compression
 | 
|
1374  | 
        # parents in the records - to be conservative, we insist that all
 | 
|
1375  | 
        # parents must be present to avoid expanding to a fulltext.
 | 
|
1376  | 
        #
 | 
|
| 
3350.3.17
by Robert Collins
 Prevent corrupt knits being created when a stream is interrupted with basis parents not present.  | 
1377  | 
        # key = basis_parent, value = index entry to add
 | 
1378  | 
buffered_index_entries = {}  | 
|
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
1379  | 
for record in stream:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1380  | 
parents = record.parents  | 
| 
3871.4.3
by John Arbash Meinel
 We should only care if the compression parent is not available, not if all parents are available.  | 
1381  | 
if record.storage_kind in delta_types:  | 
1382  | 
                # TODO: eventually the record itself should track
 | 
|
1383  | 
                #       compression_parent
 | 
|
1384  | 
compression_parent = parents[0]  | 
|
1385  | 
else:  | 
|
1386  | 
compression_parent = None  | 
|
| 
3350.3.15
by Robert Collins
 Update the insert_record_stream contract to error if an absent record is provided.  | 
1387  | 
            # Raise an error when a record is missing.
 | 
1388  | 
if record.storage_kind == 'absent':  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1389  | 
raise RevisionNotPresent([record.key], self)  | 
| 
3830.3.15
by Martin Pool
 Check against all parents when deciding whether to store a fulltext in a stacked repository  | 
1390  | 
elif ((record.storage_kind in knit_types)  | 
| 
3871.4.3
by John Arbash Meinel
 We should only care if the compression parent is not available, not if all parents are available.  | 
1391  | 
and (compression_parent is None  | 
| 
3830.3.18
by Martin Pool
 Faster expression evaluation order  | 
1392  | 
or not self._fallback_vfs  | 
| 
3871.4.3
by John Arbash Meinel
 We should only care if the compression parent is not available, not if all parents are available.  | 
1393  | 
or self._index.has_key(compression_parent)  | 
1394  | 
or not self.has_key(compression_parent))):  | 
|
| 
3830.3.9
by Martin Pool
 Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests  | 
1395  | 
                # we can insert the knit record literally if either it has no
 | 
1396  | 
                # compression parent OR we already have its basis in this kvf
 | 
|
1397  | 
                # OR the basis is not present even in the fallbacks.  In the
 | 
|
1398  | 
                # last case it will either turn up later in the stream and all
 | 
|
1399  | 
                # will be well, or it won't turn up at all and we'll raise an
 | 
|
1400  | 
                # error at the end.
 | 
|
| 
3830.3.13
by Martin Pool
 review cleanups to insert_record_stream  | 
1401  | 
                #
 | 
1402  | 
                # TODO: self.has_key is somewhat redundant with
 | 
|
1403  | 
                # self._index.has_key; we really want something that directly
 | 
|
1404  | 
                # asks if it's only present in the fallbacks. -- mbp 20081119
 | 
|
| 
3350.3.9
by Robert Collins
 Avoid full text reconstruction when transferring knit to knit via record streams.  | 
1405  | 
if record.storage_kind not in native_types:  | 
1406  | 
try:  | 
|
1407  | 
adapter_key = (record.storage_kind, "knit-delta-gz")  | 
|
1408  | 
adapter = get_adapter(adapter_key)  | 
|
1409  | 
except KeyError:  | 
|
1410  | 
adapter_key = (record.storage_kind, "knit-ft-gz")  | 
|
1411  | 
adapter = get_adapter(adapter_key)  | 
|
1412  | 
bytes = adapter.get_bytes(  | 
|
1413  | 
record, record.get_bytes_as(record.storage_kind))  | 
|
1414  | 
else:  | 
|
1415  | 
bytes = record.get_bytes_as(record.storage_kind)  | 
|
1416  | 
options = [record._build_details[0]]  | 
|
1417  | 
if record._build_details[1]:  | 
|
1418  | 
options.append('no-eol')  | 
|
| 
3350.3.11
by Robert Collins
 Test inserting a stream that overlaps the current content of a knit does not error.  | 
1419  | 
                # Just blat it across.
 | 
1420  | 
                # Note: This does end up adding data on duplicate keys. As
 | 
|
1421  | 
                # modern repositories use atomic insertions this should not
 | 
|
1422  | 
                # lead to excessive growth in the event of interrupted fetches.
 | 
|
1423  | 
                # 'knit' repositories may suffer excessive growth, but as a
 | 
|
1424  | 
                # deprecated format this is tolerable. It can be fixed if
 | 
|
1425  | 
                # needed by in the kndx index support raising on a duplicate
 | 
|
1426  | 
                # add with identical parents and options.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1427  | 
access_memo = self._access.add_raw_records(  | 
1428  | 
[(record.key, len(bytes))], bytes)[0]  | 
|
1429  | 
index_entry = (record.key, options, access_memo, parents)  | 
|
| 
3350.3.17
by Robert Collins
 Prevent corrupt knits being created when a stream is interrupted with basis parents not present.  | 
1430  | 
buffered = False  | 
1431  | 
if 'fulltext' not in options:  | 
|
| 
3830.3.24
by John Arbash Meinel
 We don't require all parents to be present, just the compression parent.  | 
1432  | 
                    # Not a fulltext, so we need to make sure the compression
 | 
1433  | 
                    # parent will also be present.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1434  | 
                    # Note that pack backed knits don't need to buffer here
 | 
1435  | 
                    # because they buffer all writes to the transaction level,
 | 
|
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
1436  | 
                    # but we don't expose that difference at the index level. If
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1437  | 
                    # the query here has sufficient cost to show up in
 | 
1438  | 
                    # profiling we should do that.
 | 
|
| 
3830.3.24
by John Arbash Meinel
 We don't require all parents to be present, just the compression parent.  | 
1439  | 
                    #
 | 
| 
3830.3.7
by Martin Pool
 KnitVersionedFiles.insert_record_stream checks that compression parents are in the same kvf, not in a fallback  | 
1440  | 
                    # They're required to be physically in this
 | 
1441  | 
                    # KnitVersionedFiles, not in a fallback.
 | 
|
| 
3871.4.3
by John Arbash Meinel
 We should only care if the compression parent is not available, not if all parents are available.  | 
1442  | 
if not self._index.has_key(compression_parent):  | 
| 
3350.3.17
by Robert Collins
 Prevent corrupt knits being created when a stream is interrupted with basis parents not present.  | 
1443  | 
pending = buffered_index_entries.setdefault(  | 
| 
3830.3.24
by John Arbash Meinel
 We don't require all parents to be present, just the compression parent.  | 
1444  | 
compression_parent, [])  | 
| 
3350.3.17
by Robert Collins
 Prevent corrupt knits being created when a stream is interrupted with basis parents not present.  | 
1445  | 
pending.append(index_entry)  | 
1446  | 
buffered = True  | 
|
1447  | 
if not buffered:  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1448  | 
self._index.add_records([index_entry])  | 
| 
3890.2.9
by John Arbash Meinel
 Start using osutils.chunks_as_lines rather than osutils.split_lines.  | 
1449  | 
elif record.storage_kind == 'chunked':  | 
1450  | 
self.add_lines(record.key, parents,  | 
|
1451  | 
osutils.chunks_to_lines(record.get_bytes_as('chunked')))  | 
|
1452  | 
elif record.storage_kind == 'fulltext':  | 
|
| 
3830.3.9
by Martin Pool
 Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests  | 
1453  | 
self.add_lines(record.key, parents,  | 
1454  | 
split_lines(record.get_bytes_as('fulltext')))  | 
|
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
1455  | 
else:  | 
| 
3830.3.9
by Martin Pool
 Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests  | 
1456  | 
                # Not a fulltext, and not suitable for direct insertion as a
 | 
| 
3849.3.2
by Andrew Bennetts
 Expand a comment inside insert_record_stream slightly.  | 
1457  | 
                # delta, either because it's not the right format, or this
 | 
1458  | 
                # KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==
 | 
|
1459  | 
                # 0) or because it depends on a base only present in the
 | 
|
1460  | 
                # fallback kvfs.
 | 
|
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
1461  | 
adapter_key = record.storage_kind, 'fulltext'  | 
| 
3350.3.9
by Robert Collins
 Avoid full text reconstruction when transferring knit to knit via record streams.  | 
1462  | 
adapter = get_adapter(adapter_key)  | 
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
1463  | 
lines = split_lines(adapter.get_bytes(  | 
1464  | 
record, record.get_bytes_as(record.storage_kind)))  | 
|
| 
3350.3.11
by Robert Collins
 Test inserting a stream that overlaps the current content of a knit does not error.  | 
1465  | 
try:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1466  | 
self.add_lines(record.key, parents, lines)  | 
| 
3350.3.11
by Robert Collins
 Test inserting a stream that overlaps the current content of a knit does not error.  | 
1467  | 
except errors.RevisionAlreadyPresent:  | 
1468  | 
                    pass
 | 
|
| 
3350.3.17
by Robert Collins
 Prevent corrupt knits being created when a stream is interrupted with basis parents not present.  | 
1469  | 
            # Add any records whose basis parent is now available.
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1470  | 
added_keys = [record.key]  | 
| 
3350.3.17
by Robert Collins
 Prevent corrupt knits being created when a stream is interrupted with basis parents not present.  | 
1471  | 
while added_keys:  | 
1472  | 
key = added_keys.pop(0)  | 
|
1473  | 
if key in buffered_index_entries:  | 
|
1474  | 
index_entries = buffered_index_entries[key]  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1475  | 
self._index.add_records(index_entries)  | 
| 
3350.3.17
by Robert Collins
 Prevent corrupt knits being created when a stream is interrupted with basis parents not present.  | 
1476  | 
added_keys.extend(  | 
1477  | 
[index_entry[0] for index_entry in index_entries])  | 
|
1478  | 
del buffered_index_entries[key]  | 
|
1479  | 
        # If there were any deltas which had a missing basis parent, error.
 | 
|
1480  | 
if buffered_index_entries:  | 
|
| 
3830.3.7
by Martin Pool
 KnitVersionedFiles.insert_record_stream checks that compression parents are in the same kvf, not in a fallback  | 
1481  | 
from pprint import pformat  | 
1482  | 
raise errors.BzrCheckError(  | 
|
1483  | 
"record_stream refers to compression parents not in %r:\n%s"  | 
|
1484  | 
% (self, pformat(sorted(buffered_index_entries.keys()))))  | 
|
| 
3350.3.8
by Robert Collins
 Basic stream insertion, no fast path yet for knit to knit.  | 
1485  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1486  | 
def iter_lines_added_or_present_in_keys(self, keys, pb=None):  | 
1487  | 
"""Iterate over the lines in the versioned files from keys.  | 
|
1488  | 
||
1489  | 
        This may return lines from other keys. Each item the returned
 | 
|
1490  | 
        iterator yields is a tuple of a line and a text version that that line
 | 
|
1491  | 
        is present in (not introduced in).
 | 
|
1492  | 
||
1493  | 
        Ordering of results is in whatever order is most suitable for the
 | 
|
1494  | 
        underlying storage format.
 | 
|
1495  | 
||
1496  | 
        If a progress bar is supplied, it may be used to indicate progress.
 | 
|
1497  | 
        The caller is responsible for cleaning up progress bars (because this
 | 
|
1498  | 
        is an iterator).
 | 
|
1499  | 
||
1500  | 
        NOTES:
 | 
|
| 
3830.3.17
by Martin Pool
 Don't assume versions being unmentioned by iter_lines_added_or_changed implies the versions aren't present  | 
1501  | 
         * Lines are normalised by the underlying store: they will all have \\n
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1502  | 
           terminators.
 | 
1503  | 
         * Lines are returned in arbitrary order.
 | 
|
| 
3830.3.17
by Martin Pool
 Don't assume versions being unmentioned by iter_lines_added_or_changed implies the versions aren't present  | 
1504  | 
         * If a requested key did not change any lines (or didn't have any
 | 
1505  | 
           lines), it may not be mentioned at all in the result.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1506  | 
|
1507  | 
        :return: An iterator over (line, key).
 | 
|
1508  | 
        """
 | 
|
1509  | 
if pb is None:  | 
|
1510  | 
pb = progress.DummyProgress()  | 
|
1511  | 
keys = set(keys)  | 
|
| 
3350.8.5
by Robert Collins
 Iter_lines_added_or_present_in_keys stacks.  | 
1512  | 
total = len(keys)  | 
| 
3789.2.12
by John Arbash Meinel
 iter_lines_added_or_present now retries.  | 
1513  | 
done = False  | 
1514  | 
while not done:  | 
|
1515  | 
try:  | 
|
1516  | 
                # we don't care about inclusions, the caller cares.
 | 
|
1517  | 
                # but we need to setup a list of records to visit.
 | 
|
1518  | 
                # we need key, position, length
 | 
|
1519  | 
key_records = []  | 
|
1520  | 
build_details = self._index.get_build_details(keys)  | 
|
1521  | 
for key, details in build_details.iteritems():  | 
|
1522  | 
if key in keys:  | 
|
1523  | 
key_records.append((key, details[0]))  | 
|
1524  | 
records_iter = enumerate(self._read_records_iter(key_records))  | 
|
1525  | 
for (key_idx, (key, data, sha_value)) in records_iter:  | 
|
1526  | 
pb.update('Walking content.', key_idx, total)  | 
|
1527  | 
compression_parent = build_details[key][1]  | 
|
1528  | 
if compression_parent is None:  | 
|
1529  | 
                        # fulltext
 | 
|
1530  | 
line_iterator = self._factory.get_fulltext_content(data)  | 
|
1531  | 
else:  | 
|
1532  | 
                        # Delta 
 | 
|
1533  | 
line_iterator = self._factory.get_linedelta_content(data)  | 
|
1534  | 
                    # Now that we are yielding the data for this key, remove it
 | 
|
1535  | 
                    # from the list
 | 
|
1536  | 
keys.remove(key)  | 
|
1537  | 
                    # XXX: It might be more efficient to yield (key,
 | 
|
1538  | 
                    # line_iterator) in the future. However for now, this is a
 | 
|
1539  | 
                    # simpler change to integrate into the rest of the
 | 
|
1540  | 
                    # codebase. RBC 20071110
 | 
|
1541  | 
for line in line_iterator:  | 
|
1542  | 
yield line, key  | 
|
1543  | 
done = True  | 
|
1544  | 
except errors.RetryWithNewPacks, e:  | 
|
1545  | 
self._access.reload_or_raise(e)  | 
|
| 
3830.3.17
by Martin Pool
 Don't assume versions being unmentioned by iter_lines_added_or_changed implies the versions aren't present  | 
1546  | 
        # If there are still keys we've not yet found, we look in the fallback
 | 
1547  | 
        # vfs, and hope to find them there.  Note that if the keys are found
 | 
|
1548  | 
        # but had no changes or no content, the fallback may not return
 | 
|
1549  | 
        # anything.  
 | 
|
1550  | 
if keys and not self._fallback_vfs:  | 
|
1551  | 
            # XXX: strictly the second parameter is meant to be the file id
 | 
|
1552  | 
            # but it's not easily accessible here.
 | 
|
1553  | 
raise RevisionNotPresent(keys, repr(self))  | 
|
| 
3350.8.5
by Robert Collins
 Iter_lines_added_or_present_in_keys stacks.  | 
1554  | 
for source in self._fallback_vfs:  | 
1555  | 
if not keys:  | 
|
1556  | 
                break
 | 
|
1557  | 
source_keys = set()  | 
|
1558  | 
for line, key in source.iter_lines_added_or_present_in_keys(keys):  | 
|
1559  | 
source_keys.add(key)  | 
|
1560  | 
yield line, key  | 
|
1561  | 
keys.difference_update(source_keys)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1562  | 
pb.update('Walking content.', total, total)  | 
1563  | 
||
1564  | 
def _make_line_delta(self, delta_seq, new_content):  | 
|
1565  | 
"""Generate a line delta from delta_seq and new_content."""  | 
|
1566  | 
diff_hunks = []  | 
|
1567  | 
for op in delta_seq.get_opcodes():  | 
|
1568  | 
if op[0] == 'equal':  | 
|
1569  | 
                continue
 | 
|
1570  | 
diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))  | 
|
1571  | 
return diff_hunks  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1572  | 
|
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
1573  | 
def _merge_annotations(self, content, parents, parent_texts={},  | 
| 
2520.4.140
by Aaron Bentley
 Use matching blocks from mpdiff for knit delta creation  | 
1574  | 
delta=None, annotated=None,  | 
1575  | 
left_matching_blocks=None):  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1576  | 
"""Merge annotations for content and generate deltas.  | 
1577  | 
        
 | 
|
1578  | 
        This is done by comparing the annotations based on changes to the text
 | 
|
1579  | 
        and generating a delta on the resulting full texts. If annotations are
 | 
|
1580  | 
        not being created then a simple delta is created.
 | 
|
| 
1596.2.27
by Robert Collins
 Note potential improvements in knit adds.  | 
1581  | 
        """
 | 
| 
2520.4.146
by Aaron Bentley
 Avoid get_matching_blocks for un-annotated text  | 
1582  | 
if left_matching_blocks is not None:  | 
1583  | 
delta_seq = diff._PrematchedMatcher(left_matching_blocks)  | 
|
1584  | 
else:  | 
|
1585  | 
delta_seq = None  | 
|
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
1586  | 
if annotated:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1587  | 
for parent_key in parents:  | 
1588  | 
merge_content = self._get_content(parent_key, parent_texts)  | 
|
1589  | 
if (parent_key == parents[0] and delta_seq is not None):  | 
|
| 
2520.4.146
by Aaron Bentley
 Avoid get_matching_blocks for un-annotated text  | 
1590  | 
seq = delta_seq  | 
| 
2520.4.140
by Aaron Bentley
 Use matching blocks from mpdiff for knit delta creation  | 
1591  | 
else:  | 
1592  | 
seq = patiencediff.PatienceSequenceMatcher(  | 
|
1593  | 
None, merge_content.text(), content.text())  | 
|
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
1594  | 
for i, j, n in seq.get_matching_blocks():  | 
1595  | 
if n == 0:  | 
|
1596  | 
                        continue
 | 
|
| 
3460.2.1
by Robert Collins
 * Inserting a bundle which changes the contents of a file with no trailing  | 
1597  | 
                    # this copies (origin, text) pairs across to the new
 | 
1598  | 
                    # content for any line that matches the last-checked
 | 
|
| 
2520.4.146
by Aaron Bentley
 Avoid get_matching_blocks for un-annotated text  | 
1599  | 
                    # parent.
 | 
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
1600  | 
content._lines[j:j+n] = merge_content._lines[i:i+n]  | 
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
1601  | 
            # XXX: Robert says the following block is a workaround for a
 | 
1602  | 
            # now-fixed bug and it can probably be deleted. -- mbp 20080618
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1603  | 
if content._lines and content._lines[-1][1][-1] != '\n':  | 
1604  | 
                # The copied annotation was from a line without a trailing EOL,
 | 
|
1605  | 
                # reinstate one for the content object, to ensure correct
 | 
|
1606  | 
                # serialization.
 | 
|
1607  | 
line = content._lines[-1][1] + '\n'  | 
|
1608  | 
content._lines[-1] = (content._lines[-1][0], line)  | 
|
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
1609  | 
if delta:  | 
| 
2520.4.146
by Aaron Bentley
 Avoid get_matching_blocks for un-annotated text  | 
1610  | 
if delta_seq is None:  | 
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
1611  | 
reference_content = self._get_content(parents[0], parent_texts)  | 
1612  | 
new_texts = content.text()  | 
|
1613  | 
old_texts = reference_content.text()  | 
|
| 
2104.4.2
by John Arbash Meinel
 Small cleanup and NEWS entry about fixing bug #65714  | 
1614  | 
delta_seq = patiencediff.PatienceSequenceMatcher(  | 
| 
2100.2.1
by wang
 Replace python's difflib by patiencediff because the worst case  | 
1615  | 
None, old_texts, new_texts)  | 
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
1616  | 
return self._make_line_delta(delta_seq, content)  | 
1617  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1618  | 
def _parse_record(self, version_id, data):  | 
1619  | 
"""Parse an original format knit record.  | 
|
1620  | 
||
1621  | 
        These have the last element of the key only present in the stored data.
 | 
|
1622  | 
        """
 | 
|
1623  | 
rec, record_contents = self._parse_record_unchecked(data)  | 
|
1624  | 
self._check_header_version(rec, version_id)  | 
|
1625  | 
return record_contents, rec[3]  | 
|
1626  | 
||
1627  | 
def _parse_record_header(self, key, raw_data):  | 
|
1628  | 
"""Parse a record header for consistency.  | 
|
1629  | 
||
1630  | 
        :return: the header and the decompressor stream.
 | 
|
1631  | 
                 as (stream, header_record)
 | 
|
1632  | 
        """
 | 
|
| 
3535.5.1
by John Arbash Meinel
 cleanup a few imports to be lazily loaded.  | 
1633  | 
df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1634  | 
try:  | 
1635  | 
            # Current serialise
 | 
|
1636  | 
rec = self._check_header(key, df.readline())  | 
|
1637  | 
except Exception, e:  | 
|
1638  | 
raise KnitCorrupt(self,  | 
|
1639  | 
"While reading {%s} got %s(%s)"  | 
|
1640  | 
% (key, e.__class__.__name__, str(e)))  | 
|
1641  | 
return df, rec  | 
|
1642  | 
||
1643  | 
def _parse_record_unchecked(self, data):  | 
|
1644  | 
        # profiling notes:
 | 
|
1645  | 
        # 4168 calls in 2880 217 internal
 | 
|
1646  | 
        # 4168 calls to _parse_record_header in 2121
 | 
|
1647  | 
        # 4168 calls to readlines in 330
 | 
|
| 
3535.5.1
by John Arbash Meinel
 cleanup a few imports to be lazily loaded.  | 
1648  | 
df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data))  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1649  | 
try:  | 
1650  | 
record_contents = df.readlines()  | 
|
1651  | 
except Exception, e:  | 
|
1652  | 
raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %  | 
|
1653  | 
(data, e.__class__.__name__, str(e)))  | 
|
1654  | 
header = record_contents.pop(0)  | 
|
1655  | 
rec = self._split_header(header)  | 
|
1656  | 
last_line = record_contents.pop()  | 
|
1657  | 
if len(record_contents) != int(rec[2]):  | 
|
1658  | 
raise KnitCorrupt(self,  | 
|
1659  | 
'incorrect number of lines %s != %s'  | 
|
1660  | 
' for version {%s} %s'  | 
|
1661  | 
% (len(record_contents), int(rec[2]),  | 
|
1662  | 
rec[1], record_contents))  | 
|
1663  | 
if last_line != 'end %s\n' % rec[1]:  | 
|
1664  | 
raise KnitCorrupt(self,  | 
|
1665  | 
'unexpected version end line %r, wanted %r'  | 
|
1666  | 
% (last_line, rec[1]))  | 
|
1667  | 
df.close()  | 
|
1668  | 
return rec, record_contents  | 
|
1669  | 
||
1670  | 
def _read_records_iter(self, records):  | 
|
1671  | 
"""Read text records from data file and yield result.  | 
|
1672  | 
||
1673  | 
        The result will be returned in whatever is the fastest to read.
 | 
|
1674  | 
        Not by the order requested. Also, multiple requests for the same
 | 
|
1675  | 
        record will only yield 1 response.
 | 
|
1676  | 
        :param records: A list of (key, access_memo) entries
 | 
|
1677  | 
        :return: Yields (key, contents, digest) in the order
 | 
|
1678  | 
                 read, not the order requested
 | 
|
1679  | 
        """
 | 
|
1680  | 
if not records:  | 
|
1681  | 
            return
 | 
|
1682  | 
||
1683  | 
        # XXX: This smells wrong, IO may not be getting ordered right.
 | 
|
1684  | 
needed_records = sorted(set(records), key=operator.itemgetter(1))  | 
|
1685  | 
if not needed_records:  | 
|
1686  | 
            return
 | 
|
1687  | 
||
1688  | 
        # The transport optimizes the fetching as well 
 | 
|
1689  | 
        # (ie, reads continuous ranges.)
 | 
|
1690  | 
raw_data = self._access.get_raw_records(  | 
|
1691  | 
[index_memo for key, index_memo in needed_records])  | 
|
1692  | 
||
1693  | 
for (key, index_memo), data in \  | 
|
1694  | 
izip(iter(needed_records), raw_data):  | 
|
1695  | 
content, digest = self._parse_record(key[-1], data)  | 
|
1696  | 
yield key, content, digest  | 
|
1697  | 
||
1698  | 
def _read_records_iter_raw(self, records):  | 
|
1699  | 
"""Read text records from data file and yield raw data.  | 
|
1700  | 
||
1701  | 
        This unpacks enough of the text record to validate the id is
 | 
|
1702  | 
        as expected but thats all.
 | 
|
1703  | 
||
1704  | 
        Each item the iterator yields is (key, bytes, sha1_of_full_text).
 | 
|
1705  | 
        """
 | 
|
1706  | 
        # setup an iterator of the external records:
 | 
|
1707  | 
        # uses readv so nice and fast we hope.
 | 
|
1708  | 
if len(records):  | 
|
1709  | 
            # grab the disk data needed.
 | 
|
1710  | 
needed_offsets = [index_memo for key, index_memo  | 
|
1711  | 
in records]  | 
|
1712  | 
raw_records = self._access.get_raw_records(needed_offsets)  | 
|
1713  | 
||
1714  | 
for key, index_memo in records:  | 
|
1715  | 
data = raw_records.next()  | 
|
1716  | 
            # validate the header (note that we can only use the suffix in
 | 
|
1717  | 
            # current knit records).
 | 
|
1718  | 
df, rec = self._parse_record_header(key, data)  | 
|
1719  | 
df.close()  | 
|
1720  | 
yield key, data, rec[3]  | 
|
1721  | 
||
1722  | 
def _record_to_data(self, key, digest, lines, dense_lines=None):  | 
|
1723  | 
"""Convert key, digest, lines into a raw data block.  | 
|
1724  | 
        
 | 
|
1725  | 
        :param key: The key of the record. Currently keys are always serialised
 | 
|
1726  | 
            using just the trailing component.
 | 
|
1727  | 
        :param dense_lines: The bytes of lines but in a denser form. For
 | 
|
1728  | 
            instance, if lines is a list of 1000 bytestrings each ending in \n,
 | 
|
1729  | 
            dense_lines may be a list with one line in it, containing all the
 | 
|
1730  | 
            1000's lines and their \n's. Using dense_lines if it is already
 | 
|
1731  | 
            known is a win because the string join to create bytes in this
 | 
|
1732  | 
            function spends less time resizing the final string.
 | 
|
1733  | 
        :return: (len, a StringIO instance with the raw data ready to read.)
 | 
|
1734  | 
        """
 | 
|
1735  | 
        # Note: using a string copy here increases memory pressure with e.g.
 | 
|
1736  | 
        # ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine
 | 
|
1737  | 
        # when doing the initial commit of a mozilla tree. RBC 20070921
 | 
|
1738  | 
bytes = ''.join(chain(  | 
|
1739  | 
["version %s %d %s\n" % (key[-1],  | 
|
1740  | 
len(lines),  | 
|
1741  | 
digest)],  | 
|
1742  | 
dense_lines or lines,  | 
|
1743  | 
["end %s\n" % key[-1]]))  | 
|
1744  | 
if type(bytes) != str:  | 
|
1745  | 
raise AssertionError(  | 
|
1746  | 
'data must be plain bytes was %s' % type(bytes))  | 
|
1747  | 
if lines and lines[-1][-1] != '\n':  | 
|
1748  | 
raise ValueError('corrupt lines value %r' % lines)  | 
|
| 
3535.5.1
by John Arbash Meinel
 cleanup a few imports to be lazily loaded.  | 
1749  | 
compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1750  | 
return len(compressed_bytes), compressed_bytes  | 
1751  | 
||
1752  | 
def _split_header(self, line):  | 
|
1753  | 
rec = line.split()  | 
|
1754  | 
if len(rec) != 4:  | 
|
1755  | 
raise KnitCorrupt(self,  | 
|
1756  | 
'unexpected number of elements in record header')  | 
|
1757  | 
return rec  | 
|
1758  | 
||
1759  | 
def keys(self):  | 
|
1760  | 
"""See VersionedFiles.keys."""  | 
|
1761  | 
if 'evil' in debug.debug_flags:  | 
|
1762  | 
trace.mutter_callsite(2, "keys scales with size of history")  | 
|
| 
3350.8.4
by Robert Collins
 Vf.keys() stacking support.  | 
1763  | 
sources = [self._index] + self._fallback_vfs  | 
1764  | 
result = set()  | 
|
1765  | 
for source in sources:  | 
|
1766  | 
result.update(source.keys())  | 
|
1767  | 
return result  | 
|
1768  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1769  | 
|
1770  | 
class _KndxIndex(object):  | 
|
1771  | 
"""Manages knit index files  | 
|
1772  | 
||
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
1773  | 
    The index is kept in memory and read on startup, to enable
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1774  | 
    fast lookups of revision information.  The cursor of the index
 | 
1775  | 
    file is always pointing to the end, making it easy to append
 | 
|
1776  | 
    entries.
 | 
|
1777  | 
||
1778  | 
    _cache is a cache for fast mapping from version id to a Index
 | 
|
1779  | 
    object.
 | 
|
1780  | 
||
1781  | 
    _history is a cache for fast mapping from indexes to version ids.
 | 
|
1782  | 
||
1783  | 
    The index data format is dictionary compressed when it comes to
 | 
|
1784  | 
    parent references; a index entry may only have parents that with a
 | 
|
1785  | 
    lover index number.  As a result, the index is topological sorted.
 | 
|
| 
1563.2.11
by Robert Collins
 Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis.  | 
1786  | 
|
1787  | 
    Duplicate entries may be written to the index for a single version id
 | 
|
1788  | 
    if this is done then the latter one completely replaces the former:
 | 
|
1789  | 
    this allows updates to correct version and parent information. 
 | 
|
1790  | 
    Note that the two entries may share the delta, and that successive
 | 
|
1791  | 
    annotations and references MUST point to the first entry.
 | 
|
| 
1641.1.2
by Robert Collins
 Change knit index files to be robust in the presence of partial writes.  | 
1792  | 
|
1793  | 
    The index file on disc contains a header, followed by one line per knit
 | 
|
1794  | 
    record. The same revision can be present in an index file more than once.
 | 
|
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
1795  | 
    The first occurrence gets assigned a sequence number starting from 0. 
 | 
| 
1641.1.2
by Robert Collins
 Change knit index files to be robust in the presence of partial writes.  | 
1796  | 
    
 | 
1797  | 
    The format of a single line is
 | 
|
1798  | 
    REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n
 | 
|
1799  | 
    REVISION_ID is a utf8-encoded revision id
 | 
|
1800  | 
    FLAGS is a comma separated list of flags about the record. Values include 
 | 
|
1801  | 
        no-eol, line-delta, fulltext.
 | 
|
1802  | 
    BYTE_OFFSET is the ascii representation of the byte offset in the data file
 | 
|
1803  | 
        that the the compressed data starts at.
 | 
|
1804  | 
    LENGTH is the ascii representation of the length of the data file.
 | 
|
1805  | 
    PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of
 | 
|
1806  | 
        REVISION_ID.
 | 
|
1807  | 
    PARENT_SEQUENCE_ID the ascii representation of the sequence number of a
 | 
|
1808  | 
        revision id already in the knit that is a parent of REVISION_ID.
 | 
|
1809  | 
    The ' :' marker is the end of record marker.
 | 
|
1810  | 
    
 | 
|
1811  | 
    partial writes:
 | 
|
| 
2158.3.1
by Dmitry Vasiliev
 KnitIndex tests/fixes/optimizations  | 
1812  | 
    when a write is interrupted to the index file, it will result in a line
 | 
1813  | 
    that does not end in ' :'. If the ' :' is not present at the end of a line,
 | 
|
1814  | 
    or at the end of the file, then the record that is missing it will be
 | 
|
1815  | 
    ignored by the parser.
 | 
|
| 
1641.1.2
by Robert Collins
 Change knit index files to be robust in the presence of partial writes.  | 
1816  | 
|
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
1817  | 
    When writing new records to the index file, the data is preceded by '\n'
 | 
| 
1641.1.2
by Robert Collins
 Change knit index files to be robust in the presence of partial writes.  | 
1818  | 
    to ensure that records always start on new lines even if the last write was
 | 
1819  | 
    interrupted. As a result its normal for the last line in the index to be
 | 
|
1820  | 
    missing a trailing newline. One can be added with no harmful effects.
 | 
|
| 
3350.6.11
by Martin Pool
 Review cleanups and documentation from Robert's mail on 2080618  | 
1821  | 
|
1822  | 
    :ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,
 | 
|
1823  | 
        where prefix is e.g. the (fileid,) for .texts instances or () for
 | 
|
1824  | 
        constant-mapped things like .revisions, and the old state is
 | 
|
1825  | 
        tuple(history_vector, cache_dict).  This is used to prevent having an
 | 
|
1826  | 
        ABI change with the C extension that reads .kndx files.
 | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1827  | 
    """
 | 
1828  | 
||
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
1829  | 
HEADER = "# bzr knit index 8\n"  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1830  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1831  | 
def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):  | 
1832  | 
"""Create a _KndxIndex on transport using mapper."""  | 
|
1833  | 
self._transport = transport  | 
|
1834  | 
self._mapper = mapper  | 
|
1835  | 
self._get_scope = get_scope  | 
|
1836  | 
self._allow_writes = allow_writes  | 
|
1837  | 
self._is_locked = is_locked  | 
|
1838  | 
self._reset_cache()  | 
|
1839  | 
self.has_graph = True  | 
|
1840  | 
||
1841  | 
def add_records(self, records, random_id=False):  | 
|
1842  | 
"""Add multiple records to the index.  | 
|
1843  | 
        
 | 
|
1844  | 
        :param records: a list of tuples:
 | 
|
1845  | 
                         (key, options, access_memo, parents).
 | 
|
1846  | 
        :param random_id: If True the ids being added were randomly generated
 | 
|
1847  | 
            and no check for existence will be performed.
 | 
|
1848  | 
        """
 | 
|
1849  | 
paths = {}  | 
|
1850  | 
for record in records:  | 
|
1851  | 
key = record[0]  | 
|
1852  | 
prefix = key[:-1]  | 
|
1853  | 
path = self._mapper.map(key) + '.kndx'  | 
|
1854  | 
path_keys = paths.setdefault(path, (prefix, []))  | 
|
1855  | 
path_keys[1].append(record)  | 
|
1856  | 
for path in sorted(paths):  | 
|
1857  | 
prefix, path_keys = paths[path]  | 
|
1858  | 
self._load_prefixes([prefix])  | 
|
1859  | 
lines = []  | 
|
1860  | 
orig_history = self._kndx_cache[prefix][1][:]  | 
|
1861  | 
orig_cache = self._kndx_cache[prefix][0].copy()  | 
|
1862  | 
||
1863  | 
try:  | 
|
1864  | 
for key, options, (_, pos, size), parents in path_keys:  | 
|
1865  | 
if parents is None:  | 
|
1866  | 
                        # kndx indices cannot be parentless.
 | 
|
1867  | 
parents = ()  | 
|
1868  | 
line = "\n%s %s %s %s %s :" % (  | 
|
1869  | 
key[-1], ','.join(options), pos, size,  | 
|
1870  | 
self._dictionary_compress(parents))  | 
|
1871  | 
if type(line) != str:  | 
|
1872  | 
raise AssertionError(  | 
|
1873  | 
'data must be utf8 was %s' % type(line))  | 
|
1874  | 
lines.append(line)  | 
|
1875  | 
self._cache_key(key, options, pos, size, parents)  | 
|
1876  | 
if len(orig_history):  | 
|
1877  | 
self._transport.append_bytes(path, ''.join(lines))  | 
|
1878  | 
else:  | 
|
1879  | 
self._init_index(path, lines)  | 
|
1880  | 
except:  | 
|
1881  | 
                # If any problems happen, restore the original values and re-raise
 | 
|
1882  | 
self._kndx_cache[prefix] = (orig_cache, orig_history)  | 
|
1883  | 
                raise
 | 
|
1884  | 
||
1885  | 
def _cache_key(self, key, options, pos, size, parent_keys):  | 
|
| 
1596.2.18
by Robert Collins
 More microopimisations on index reading, now down to 16000 records/seconds.  | 
1886  | 
"""Cache a version record in the history array and index cache.  | 
| 
2158.3.1
by Dmitry Vasiliev
 KnitIndex tests/fixes/optimizations  | 
1887  | 
|
1888  | 
        This is inlined into _load_data for performance. KEEP IN SYNC.
 | 
|
| 
1596.2.18
by Robert Collins
 More microopimisations on index reading, now down to 16000 records/seconds.  | 
1889  | 
        (It saves 60ms, 25% of the __init__ overhead on local 4000 record
 | 
1890  | 
         indexes).
 | 
|
1891  | 
        """
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1892  | 
prefix = key[:-1]  | 
1893  | 
version_id = key[-1]  | 
|
1894  | 
        # last-element only for compatibilty with the C load_data.
 | 
|
1895  | 
parents = tuple(parent[-1] for parent in parent_keys)  | 
|
1896  | 
for parent in parent_keys:  | 
|
1897  | 
if parent[:-1] != prefix:  | 
|
1898  | 
raise ValueError("mismatched prefixes for %r, %r" % (  | 
|
1899  | 
key, parent_keys))  | 
|
1900  | 
cache, history = self._kndx_cache[prefix]  | 
|
| 
1596.2.14
by Robert Collins
 Make knit parsing non quadratic?  | 
1901  | 
        # only want the _history index to reference the 1st index entry
 | 
1902  | 
        # for version_id
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1903  | 
if version_id not in cache:  | 
1904  | 
index = len(history)  | 
|
1905  | 
history.append(version_id)  | 
|
| 
1628.1.1
by Robert Collins
 Cache the index number of versions in the knit index's self._cache so that  | 
1906  | 
else:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1907  | 
index = cache[version_id][5]  | 
1908  | 
cache[version_id] = (version_id,  | 
|
| 
1628.1.1
by Robert Collins
 Cache the index number of versions in the knit index's self._cache so that  | 
1909  | 
options,  | 
1910  | 
pos,  | 
|
1911  | 
size,  | 
|
1912  | 
parents,  | 
|
1913  | 
index)  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1914  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1915  | 
def check_header(self, fp):  | 
1916  | 
line = fp.readline()  | 
|
1917  | 
if line == '':  | 
|
1918  | 
            # An empty file can actually be treated as though the file doesn't
 | 
|
1919  | 
            # exist yet.
 | 
|
1920  | 
raise errors.NoSuchFile(self)  | 
|
1921  | 
if line != self.HEADER:  | 
|
1922  | 
raise KnitHeaderError(badline=line, filename=self)  | 
|
1923  | 
||
1924  | 
def _check_read(self):  | 
|
1925  | 
if not self._is_locked():  | 
|
1926  | 
raise errors.ObjectNotLocked(self)  | 
|
1927  | 
if self._get_scope() != self._scope:  | 
|
1928  | 
self._reset_cache()  | 
|
1929  | 
||
| 
3316.2.3
by Robert Collins
 Remove manual notification of transaction finishing on versioned files.  | 
1930  | 
def _check_write_ok(self):  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1931  | 
"""Assert if not writes are permitted."""  | 
1932  | 
if not self._is_locked():  | 
|
1933  | 
raise errors.ObjectNotLocked(self)  | 
|
| 
3316.2.5
by Robert Collins
 Review feedback.  | 
1934  | 
if self._get_scope() != self._scope:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1935  | 
self._reset_cache()  | 
| 
3316.2.3
by Robert Collins
 Remove manual notification of transaction finishing on versioned files.  | 
1936  | 
if self._mode != 'w':  | 
1937  | 
raise errors.ReadOnlyObjectDirtiedError(self)  | 
|
1938  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1939  | 
def get_build_details(self, keys):  | 
1940  | 
"""Get the method, index_memo and compression parent for keys.  | 
|
| 
3218.1.1
by Robert Collins
 Reduce index query pressure for text construction by batching the individual queries into single batch queries.  | 
1941  | 
|
| 
3224.1.29
by John Arbash Meinel
 Properly handle annotating when ghosts are present.  | 
1942  | 
        Ghosts are omitted from the result.
 | 
1943  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1944  | 
        :param keys: An iterable of keys.
 | 
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
1945  | 
        :return: A dict of key:(index_memo, compression_parent, parents,
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1946  | 
            record_details).
 | 
| 
3224.1.14
by John Arbash Meinel
 Switch to making content_details opaque, step 1  | 
1947  | 
            index_memo
 | 
1948  | 
                opaque structure to pass to read_records to extract the raw
 | 
|
1949  | 
                data
 | 
|
1950  | 
            compression_parent
 | 
|
1951  | 
                Content that this record is built upon, may be None
 | 
|
1952  | 
            parents
 | 
|
1953  | 
                Logical parents of this node
 | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
1954  | 
            record_details
 | 
| 
3224.1.14
by John Arbash Meinel
 Switch to making content_details opaque, step 1  | 
1955  | 
                extra information about the content which needs to be passed to
 | 
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
1956  | 
                Factory.parse_record
 | 
| 
3218.1.1
by Robert Collins
 Reduce index query pressure for text construction by batching the individual queries into single batch queries.  | 
1957  | 
        """
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1958  | 
parent_map = self.get_parent_map(keys)  | 
| 
3218.1.1
by Robert Collins
 Reduce index query pressure for text construction by batching the individual queries into single batch queries.  | 
1959  | 
result = {}  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1960  | 
for key in keys:  | 
1961  | 
if key not in parent_map:  | 
|
1962  | 
continue # Ghost  | 
|
1963  | 
method = self.get_method(key)  | 
|
1964  | 
parents = parent_map[key]  | 
|
| 
3218.1.1
by Robert Collins
 Reduce index query pressure for text construction by batching the individual queries into single batch queries.  | 
1965  | 
if method == 'fulltext':  | 
1966  | 
compression_parent = None  | 
|
1967  | 
else:  | 
|
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
1968  | 
compression_parent = parents[0]  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1969  | 
noeol = 'no-eol' in self.get_options(key)  | 
1970  | 
index_memo = self.get_position(key)  | 
|
1971  | 
result[key] = (index_memo, compression_parent,  | 
|
| 
3224.1.14
by John Arbash Meinel
 Switch to making content_details opaque, step 1  | 
1972  | 
parents, (method, noeol))  | 
| 
3218.1.1
by Robert Collins
 Reduce index query pressure for text construction by batching the individual queries into single batch queries.  | 
1973  | 
return result  | 
1974  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1975  | 
def get_method(self, key):  | 
1976  | 
"""Return compression method of specified key."""  | 
|
1977  | 
options = self.get_options(key)  | 
|
1978  | 
if 'fulltext' in options:  | 
|
1979  | 
return 'fulltext'  | 
|
1980  | 
elif 'line-delta' in options:  | 
|
1981  | 
return 'line-delta'  | 
|
1982  | 
else:  | 
|
1983  | 
raise errors.KnitIndexUnknownMethod(self, options)  | 
|
1984  | 
||
1985  | 
def get_options(self, key):  | 
|
1986  | 
"""Return a list representing options.  | 
|
1987  | 
||
1988  | 
        e.g. ['foo', 'bar']
 | 
|
1989  | 
        """
 | 
|
1990  | 
prefix, suffix = self._split_key(key)  | 
|
1991  | 
self._load_prefixes([prefix])  | 
|
| 
3350.8.9
by Robert Collins
 define behaviour for add_lines with stacked storage.  | 
1992  | 
try:  | 
1993  | 
return self._kndx_cache[prefix][0][suffix][1]  | 
|
1994  | 
except KeyError:  | 
|
1995  | 
raise RevisionNotPresent(key, self)  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
1996  | 
|
1997  | 
def get_parent_map(self, keys):  | 
|
1998  | 
"""Get a map of the parents of keys.  | 
|
1999  | 
||
2000  | 
        :param keys: The keys to look up parents for.
 | 
|
2001  | 
        :return: A mapping from keys to parents. Absent keys are absent from
 | 
|
2002  | 
            the mapping.
 | 
|
2003  | 
        """
 | 
|
2004  | 
        # Parse what we need to up front, this potentially trades off I/O
 | 
|
2005  | 
        # locality (.kndx and .knit in the same block group for the same file
 | 
|
2006  | 
        # id) for less checking in inner loops.
 | 
|
| 
3350.6.10
by Martin Pool
 VersionedFiles review cleanups  | 
2007  | 
prefixes = set(key[:-1] for key in keys)  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2008  | 
self._load_prefixes(prefixes)  | 
2009  | 
result = {}  | 
|
2010  | 
for key in keys:  | 
|
2011  | 
prefix = key[:-1]  | 
|
2012  | 
try:  | 
|
2013  | 
suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]  | 
|
2014  | 
except KeyError:  | 
|
2015  | 
                pass
 | 
|
2016  | 
else:  | 
|
2017  | 
result[key] = tuple(prefix + (suffix,) for  | 
|
2018  | 
suffix in suffix_parents)  | 
|
2019  | 
return result  | 
|
2020  | 
||
2021  | 
def get_position(self, key):  | 
|
2022  | 
"""Return details needed to access the version.  | 
|
2023  | 
        
 | 
|
2024  | 
        :return: a tuple (key, data position, size) to hand to the access
 | 
|
2025  | 
            logic to get the record.
 | 
|
2026  | 
        """
 | 
|
2027  | 
prefix, suffix = self._split_key(key)  | 
|
2028  | 
self._load_prefixes([prefix])  | 
|
2029  | 
entry = self._kndx_cache[prefix][0][suffix]  | 
|
2030  | 
return key, entry[2], entry[3]  | 
|
2031  | 
||
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
2032  | 
has_key = _mod_index._has_key_from_parent_map  | 
2033  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2034  | 
def _init_index(self, path, extra_lines=[]):  | 
2035  | 
"""Initialize an index."""  | 
|
2036  | 
sio = StringIO()  | 
|
2037  | 
sio.write(self.HEADER)  | 
|
2038  | 
sio.writelines(extra_lines)  | 
|
2039  | 
sio.seek(0)  | 
|
2040  | 
self._transport.put_file_non_atomic(path, sio,  | 
|
2041  | 
create_parent_dir=True)  | 
|
2042  | 
                           # self._create_parent_dir)
 | 
|
2043  | 
                           # mode=self._file_mode,
 | 
|
2044  | 
                           # dir_mode=self._dir_mode)
 | 
|
2045  | 
||
2046  | 
def keys(self):  | 
|
2047  | 
"""Get all the keys in the collection.  | 
|
2048  | 
        
 | 
|
2049  | 
        The keys are not ordered.
 | 
|
2050  | 
        """
 | 
|
2051  | 
result = set()  | 
|
2052  | 
        # Identify all key prefixes.
 | 
|
2053  | 
        # XXX: A bit hacky, needs polish.
 | 
|
2054  | 
if type(self._mapper) == ConstantMapper:  | 
|
2055  | 
prefixes = [()]  | 
|
2056  | 
else:  | 
|
2057  | 
relpaths = set()  | 
|
2058  | 
for quoted_relpath in self._transport.iter_files_recursive():  | 
|
2059  | 
path, ext = os.path.splitext(quoted_relpath)  | 
|
2060  | 
relpaths.add(path)  | 
|
2061  | 
prefixes = [self._mapper.unmap(path) for path in relpaths]  | 
|
2062  | 
self._load_prefixes(prefixes)  | 
|
2063  | 
for prefix in prefixes:  | 
|
2064  | 
for suffix in self._kndx_cache[prefix][1]:  | 
|
2065  | 
result.add(prefix + (suffix,))  | 
|
2066  | 
return result  | 
|
2067  | 
||
2068  | 
def _load_prefixes(self, prefixes):  | 
|
2069  | 
"""Load the indices for prefixes."""  | 
|
2070  | 
self._check_read()  | 
|
2071  | 
for prefix in prefixes:  | 
|
2072  | 
if prefix not in self._kndx_cache:  | 
|
2073  | 
                # the load_data interface writes to these variables.
 | 
|
2074  | 
self._cache = {}  | 
|
2075  | 
self._history = []  | 
|
2076  | 
self._filename = prefix  | 
|
2077  | 
try:  | 
|
2078  | 
path = self._mapper.map(prefix) + '.kndx'  | 
|
2079  | 
fp = self._transport.get(path)  | 
|
2080  | 
try:  | 
|
2081  | 
                        # _load_data may raise NoSuchFile if the target knit is
 | 
|
2082  | 
                        # completely empty.
 | 
|
2083  | 
_load_data(self, fp)  | 
|
2084  | 
finally:  | 
|
2085  | 
fp.close()  | 
|
2086  | 
self._kndx_cache[prefix] = (self._cache, self._history)  | 
|
2087  | 
del self._cache  | 
|
2088  | 
del self._filename  | 
|
2089  | 
del self._history  | 
|
2090  | 
except NoSuchFile:  | 
|
2091  | 
self._kndx_cache[prefix] = ({}, [])  | 
|
2092  | 
if type(self._mapper) == ConstantMapper:  | 
|
2093  | 
                        # preserve behaviour for revisions.kndx etc.
 | 
|
2094  | 
self._init_index(path)  | 
|
2095  | 
del self._cache  | 
|
2096  | 
del self._filename  | 
|
2097  | 
del self._history  | 
|
2098  | 
||
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
2099  | 
missing_keys = _mod_index._missing_keys_from_parent_map  | 
2100  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2101  | 
def _partition_keys(self, keys):  | 
2102  | 
"""Turn keys into a dict of prefix:suffix_list."""  | 
|
2103  | 
result = {}  | 
|
2104  | 
for key in keys:  | 
|
2105  | 
prefix_keys = result.setdefault(key[:-1], [])  | 
|
2106  | 
prefix_keys.append(key[-1])  | 
|
2107  | 
return result  | 
|
2108  | 
||
2109  | 
def _dictionary_compress(self, keys):  | 
|
2110  | 
"""Dictionary compress keys.  | 
|
2111  | 
        
 | 
|
2112  | 
        :param keys: The keys to generate references to.
 | 
|
2113  | 
        :return: A string representation of keys. keys which are present are
 | 
|
2114  | 
            dictionary compressed, and others are emitted as fulltext with a
 | 
|
2115  | 
            '.' prefix.
 | 
|
2116  | 
        """
 | 
|
2117  | 
if not keys:  | 
|
2118  | 
return ''  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
2119  | 
result_list = []  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2120  | 
prefix = keys[0][:-1]  | 
2121  | 
cache = self._kndx_cache[prefix][0]  | 
|
2122  | 
for key in keys:  | 
|
2123  | 
if key[:-1] != prefix:  | 
|
2124  | 
                # kndx indices cannot refer across partitioned storage.
 | 
|
2125  | 
raise ValueError("mismatched prefixes for %r" % keys)  | 
|
2126  | 
if key[-1] in cache:  | 
|
| 
1628.1.1
by Robert Collins
 Cache the index number of versions in the knit index's self._cache so that  | 
2127  | 
                # -- inlined lookup() --
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2128  | 
result_list.append(str(cache[key[-1]][5]))  | 
| 
1628.1.1
by Robert Collins
 Cache the index number of versions in the knit index's self._cache so that  | 
2129  | 
                # -- end lookup () --
 | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
2130  | 
else:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2131  | 
result_list.append('.' + key[-1])  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
2132  | 
return ' '.join(result_list)  | 
2133  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2134  | 
def _reset_cache(self):  | 
2135  | 
        # Possibly this should be a LRU cache. A dictionary from key_prefix to
 | 
|
2136  | 
        # (cache_dict, history_vector) for parsed kndx files.
 | 
|
2137  | 
self._kndx_cache = {}  | 
|
2138  | 
self._scope = self._get_scope()  | 
|
2139  | 
allow_writes = self._allow_writes()  | 
|
2140  | 
if allow_writes:  | 
|
2141  | 
self._mode = 'w'  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
2142  | 
else:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2143  | 
self._mode = 'r'  | 
2144  | 
||
| 
3878.1.2
by John Arbash Meinel
 Move the sorting into each index, and customize it for Kndx access.  | 
2145  | 
def _sort_keys_by_io(self, keys, positions):  | 
2146  | 
"""Figure out an optimal order to read the records for the given keys.  | 
|
2147  | 
||
2148  | 
        Sort keys, grouped by index and sorted by position.
 | 
|
2149  | 
||
2150  | 
        :param keys: A list of keys whose records we want to read. This will be
 | 
|
2151  | 
            sorted 'in-place'.
 | 
|
2152  | 
        :param positions: A dict, such as the one returned by
 | 
|
2153  | 
            _get_components_positions()
 | 
|
2154  | 
        :return: None
 | 
|
2155  | 
        """
 | 
|
| 
3878.1.3
by John Arbash Meinel
 Add a comment about what data we are sorting by.  | 
2156  | 
def get_sort_key(key):  | 
| 
3878.1.2
by John Arbash Meinel
 Move the sorting into each index, and customize it for Kndx access.  | 
2157  | 
index_memo = positions[key][1]  | 
2158  | 
            # Group by prefix and position. index_memo[0] is the key, so it is
 | 
|
2159  | 
            # (file_id, revision_id) and we don't want to sort on revision_id,
 | 
|
2160  | 
            # index_memo[1] is the position, and index_memo[2] is the size,
 | 
|
2161  | 
            # which doesn't matter for the sort
 | 
|
2162  | 
return index_memo[0][:-1], index_memo[1]  | 
|
| 
3878.1.3
by John Arbash Meinel
 Add a comment about what data we are sorting by.  | 
2163  | 
return keys.sort(key=get_sort_key)  | 
| 
3878.1.2
by John Arbash Meinel
 Move the sorting into each index, and customize it for Kndx access.  | 
2164  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2165  | 
def _split_key(self, key):  | 
2166  | 
"""Split key into a prefix and suffix."""  | 
|
2167  | 
return key[:-1], key[-1]  | 
|
2168  | 
||
2169  | 
||
2170  | 
class _KnitGraphIndex(object):  | 
|
2171  | 
"""A KnitVersionedFiles index layered on GraphIndex."""  | 
|
2172  | 
||
2173  | 
def __init__(self, graph_index, is_locked, deltas=False, parents=True,  | 
|
2174  | 
add_callback=None):  | 
|
| 
2592.3.2
by Robert Collins
 Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API.  | 
2175  | 
"""Construct a KnitGraphIndex on a graph_index.  | 
2176  | 
||
2177  | 
        :param graph_index: An implementation of bzrlib.index.GraphIndex.
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2178  | 
        :param is_locked: A callback to check whether the object should answer
 | 
2179  | 
            queries.
 | 
|
| 
2592.3.13
by Robert Collins
 Implement KnitGraphIndex.get_method.  | 
2180  | 
        :param deltas: Allow delta-compressed records.
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2181  | 
        :param parents: If True, record knits parents, if not do not record 
 | 
2182  | 
            parents.
 | 
|
| 
2592.3.19
by Robert Collins
 Change KnitGraphIndex from returning data to performing a callback on insertions.  | 
2183  | 
        :param add_callback: If not None, allow additions to the index and call
 | 
2184  | 
            this callback with a list of added GraphIndex nodes:
 | 
|
| 
2592.3.33
by Robert Collins
 Change the order of index refs and values to make the no-graph knit index easier.  | 
2185  | 
            [(node, value, node_refs), ...]
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2186  | 
        :param is_locked: A callback, returns True if the index is locked and
 | 
2187  | 
            thus usable.
 | 
|
| 
2592.3.2
by Robert Collins
 Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API.  | 
2188  | 
        """
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2189  | 
self._add_callback = add_callback  | 
| 
2592.3.2
by Robert Collins
 Implement a get_graph for a new KnitGraphIndex that will implement a KnitIndex on top of the GraphIndex API.  | 
2190  | 
self._graph_index = graph_index  | 
| 
2592.3.13
by Robert Collins
 Implement KnitGraphIndex.get_method.  | 
2191  | 
self._deltas = deltas  | 
| 
2592.3.34
by Robert Collins
 Rough unfactored support for parentless KnitGraphIndexs.  | 
2192  | 
self._parents = parents  | 
2193  | 
if deltas and not parents:  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2194  | 
            # XXX: TODO: Delta tree and parent graph should be conceptually
 | 
2195  | 
            # separate.
 | 
|
| 
2592.3.34
by Robert Collins
 Rough unfactored support for parentless KnitGraphIndexs.  | 
2196  | 
raise KnitCorrupt(self, "Cannot do delta compression without "  | 
2197  | 
"parent tracking.")  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2198  | 
self.has_graph = parents  | 
2199  | 
self._is_locked = is_locked  | 
|
2200  | 
||
| 
3517.4.13
by Martin Pool
 Add repr methods  | 
2201  | 
def __repr__(self):  | 
2202  | 
return "%s(%r)" % (self.__class__.__name__, self._graph_index)  | 
|
2203  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2204  | 
def add_records(self, records, random_id=False):  | 
2205  | 
"""Add multiple records to the index.  | 
|
| 
2592.3.17
by Robert Collins
 Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile.  | 
2206  | 
        
 | 
2207  | 
        This function does not insert data into the Immutable GraphIndex
 | 
|
2208  | 
        backing the KnitGraphIndex, instead it prepares data for insertion by
 | 
|
| 
2592.3.19
by Robert Collins
 Change KnitGraphIndex from returning data to performing a callback on insertions.  | 
2209  | 
        the caller and checks that it is safe to insert then calls
 | 
2210  | 
        self._add_callback with the prepared GraphIndex nodes.
 | 
|
| 
2592.3.17
by Robert Collins
 Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile.  | 
2211  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2212  | 
        :param records: a list of tuples:
 | 
2213  | 
                         (key, options, access_memo, parents).
 | 
|
| 
2841.2.1
by Robert Collins
 * Commit no longer checks for new text keys during insertion when the  | 
2214  | 
        :param random_id: If True the ids being added were randomly generated
 | 
2215  | 
            and no check for existence will be performed.
 | 
|
| 
2592.3.17
by Robert Collins
 Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile.  | 
2216  | 
        """
 | 
| 
2592.3.19
by Robert Collins
 Change KnitGraphIndex from returning data to performing a callback on insertions.  | 
2217  | 
if not self._add_callback:  | 
2218  | 
raise errors.ReadOnlyError(self)  | 
|
| 
2592.3.17
by Robert Collins
 Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile.  | 
2219  | 
        # we hope there are no repositories with inconsistent parentage
 | 
2220  | 
        # anymore.
 | 
|
2221  | 
||
2222  | 
keys = {}  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2223  | 
for (key, options, access_memo, parents) in records:  | 
2224  | 
if self._parents:  | 
|
2225  | 
parents = tuple(parents)  | 
|
| 
2670.2.2
by Robert Collins
 * In ``bzrlib.knit`` the internal interface has been altered to use  | 
2226  | 
index, pos, size = access_memo  | 
| 
2592.3.17
by Robert Collins
 Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile.  | 
2227  | 
if 'no-eol' in options:  | 
2228  | 
value = 'N'  | 
|
2229  | 
else:  | 
|
2230  | 
value = ' '  | 
|
2231  | 
value += "%d %d" % (pos, size)  | 
|
| 
2592.3.34
by Robert Collins
 Rough unfactored support for parentless KnitGraphIndexs.  | 
2232  | 
if not self._deltas:  | 
| 
2592.3.17
by Robert Collins
 Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile.  | 
2233  | 
if 'line-delta' in options:  | 
2234  | 
raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")  | 
|
| 
2592.3.34
by Robert Collins
 Rough unfactored support for parentless KnitGraphIndexs.  | 
2235  | 
if self._parents:  | 
2236  | 
if self._deltas:  | 
|
2237  | 
if 'line-delta' in options:  | 
|
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
2238  | 
node_refs = (parents, (parents[0],))  | 
| 
2592.3.34
by Robert Collins
 Rough unfactored support for parentless KnitGraphIndexs.  | 
2239  | 
else:  | 
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
2240  | 
node_refs = (parents, ())  | 
| 
2592.3.34
by Robert Collins
 Rough unfactored support for parentless KnitGraphIndexs.  | 
2241  | 
else:  | 
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
2242  | 
node_refs = (parents, )  | 
| 
2592.3.34
by Robert Collins
 Rough unfactored support for parentless KnitGraphIndexs.  | 
2243  | 
else:  | 
2244  | 
if parents:  | 
|
2245  | 
raise KnitCorrupt(self, "attempt to add node with parents "  | 
|
2246  | 
"in parentless index.")  | 
|
2247  | 
node_refs = ()  | 
|
| 
2624.2.5
by Robert Collins
 Change bzrlib.index.Index keys to be 1-tuples, not strings.  | 
2248  | 
keys[key] = (value, node_refs)  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2249  | 
        # check for dups
 | 
| 
2841.2.1
by Robert Collins
 * Commit no longer checks for new text keys during insertion when the  | 
2250  | 
if not random_id:  | 
2251  | 
present_nodes = self._get_entries(keys)  | 
|
2252  | 
for (index, key, value, node_refs) in present_nodes:  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2253  | 
if (value[0] != keys[key][0][0] or  | 
2254  | 
node_refs != keys[key][1]):  | 
|
2255  | 
raise KnitCorrupt(self, "inconsistent details in add_records"  | 
|
| 
2841.2.1
by Robert Collins
 * Commit no longer checks for new text keys during insertion when the  | 
2256  | 
": %s %s" % ((value, node_refs), keys[key]))  | 
2257  | 
del keys[key]  | 
|
| 
2592.3.17
by Robert Collins
 Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile.  | 
2258  | 
result = []  | 
| 
2592.3.34
by Robert Collins
 Rough unfactored support for parentless KnitGraphIndexs.  | 
2259  | 
if self._parents:  | 
2260  | 
for key, (value, node_refs) in keys.iteritems():  | 
|
2261  | 
result.append((key, value, node_refs))  | 
|
2262  | 
else:  | 
|
2263  | 
for key, (value, node_refs) in keys.iteritems():  | 
|
2264  | 
result.append((key, value))  | 
|
| 
2592.3.19
by Robert Collins
 Change KnitGraphIndex from returning data to performing a callback on insertions.  | 
2265  | 
self._add_callback(result)  | 
| 
2592.3.17
by Robert Collins
 Add add_version(s) to KnitGraphIndex, completing the required api for KnitVersionedFile.  | 
2266  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2267  | 
def _check_read(self):  | 
2268  | 
"""raise if reads are not permitted."""  | 
|
2269  | 
if not self._is_locked():  | 
|
2270  | 
raise errors.ObjectNotLocked(self)  | 
|
2271  | 
||
2272  | 
def _check_write_ok(self):  | 
|
2273  | 
"""Assert if writes are not permitted."""  | 
|
2274  | 
if not self._is_locked():  | 
|
2275  | 
raise errors.ObjectNotLocked(self)  | 
|
2276  | 
||
2277  | 
def _compression_parent(self, an_entry):  | 
|
2278  | 
        # return the key that an_entry is compressed against, or None
 | 
|
2279  | 
        # Grab the second parent list (as deltas implies parents currently)
 | 
|
2280  | 
compression_parents = an_entry[3][1]  | 
|
2281  | 
if not compression_parents:  | 
|
2282  | 
return None  | 
|
2283  | 
if len(compression_parents) != 1:  | 
|
2284  | 
raise AssertionError(  | 
|
2285  | 
"Too many compression parents: %r" % compression_parents)  | 
|
2286  | 
return compression_parents[0]  | 
|
2287  | 
||
2288  | 
def get_build_details(self, keys):  | 
|
2289  | 
"""Get the method, index_memo and compression parent for version_ids.  | 
|
2290  | 
||
2291  | 
        Ghosts are omitted from the result.
 | 
|
2292  | 
||
2293  | 
        :param keys: An iterable of keys.
 | 
|
2294  | 
        :return: A dict of key:
 | 
|
2295  | 
            (index_memo, compression_parent, parents, record_details).
 | 
|
2296  | 
            index_memo
 | 
|
2297  | 
                opaque structure to pass to read_records to extract the raw
 | 
|
2298  | 
                data
 | 
|
2299  | 
            compression_parent
 | 
|
2300  | 
                Content that this record is built upon, may be None
 | 
|
2301  | 
            parents
 | 
|
2302  | 
                Logical parents of this node
 | 
|
2303  | 
            record_details
 | 
|
2304  | 
                extra information about the content which needs to be passed to
 | 
|
2305  | 
                Factory.parse_record
 | 
|
2306  | 
        """
 | 
|
2307  | 
self._check_read()  | 
|
2308  | 
result = {}  | 
|
2309  | 
entries = self._get_entries(keys, False)  | 
|
2310  | 
for entry in entries:  | 
|
2311  | 
key = entry[1]  | 
|
2312  | 
if not self._parents:  | 
|
2313  | 
parents = ()  | 
|
2314  | 
else:  | 
|
2315  | 
parents = entry[3][0]  | 
|
2316  | 
if not self._deltas:  | 
|
2317  | 
compression_parent_key = None  | 
|
2318  | 
else:  | 
|
2319  | 
compression_parent_key = self._compression_parent(entry)  | 
|
2320  | 
noeol = (entry[2][0] == 'N')  | 
|
2321  | 
if compression_parent_key:  | 
|
2322  | 
method = 'line-delta'  | 
|
2323  | 
else:  | 
|
2324  | 
method = 'fulltext'  | 
|
2325  | 
result[key] = (self._node_to_position(entry),  | 
|
2326  | 
compression_parent_key, parents,  | 
|
2327  | 
(method, noeol))  | 
|
2328  | 
return result  | 
|
2329  | 
||
2330  | 
def _get_entries(self, keys, check_present=False):  | 
|
2331  | 
"""Get the entries for keys.  | 
|
2332  | 
        
 | 
|
2333  | 
        :param keys: An iterable of index key tuples.
 | 
|
2334  | 
        """
 | 
|
2335  | 
keys = set(keys)  | 
|
2336  | 
found_keys = set()  | 
|
2337  | 
if self._parents:  | 
|
2338  | 
for node in self._graph_index.iter_entries(keys):  | 
|
2339  | 
yield node  | 
|
2340  | 
found_keys.add(node[1])  | 
|
2341  | 
else:  | 
|
2342  | 
            # adapt parentless index to the rest of the code.
 | 
|
2343  | 
for node in self._graph_index.iter_entries(keys):  | 
|
2344  | 
yield node[0], node[1], node[2], ()  | 
|
2345  | 
found_keys.add(node[1])  | 
|
2346  | 
if check_present:  | 
|
2347  | 
missing_keys = keys.difference(found_keys)  | 
|
2348  | 
if missing_keys:  | 
|
2349  | 
raise RevisionNotPresent(missing_keys.pop(), self)  | 
|
2350  | 
||
2351  | 
def get_method(self, key):  | 
|
2352  | 
"""Return compression method of specified key."""  | 
|
2353  | 
return self._get_method(self._get_node(key))  | 
|
2354  | 
||
2355  | 
def _get_method(self, node):  | 
|
2356  | 
if not self._deltas:  | 
|
2357  | 
return 'fulltext'  | 
|
2358  | 
if self._compression_parent(node):  | 
|
2359  | 
return 'line-delta'  | 
|
2360  | 
else:  | 
|
2361  | 
return 'fulltext'  | 
|
2362  | 
||
2363  | 
def _get_node(self, key):  | 
|
2364  | 
try:  | 
|
2365  | 
return list(self._get_entries([key]))[0]  | 
|
2366  | 
except IndexError:  | 
|
2367  | 
raise RevisionNotPresent(key, self)  | 
|
2368  | 
||
2369  | 
def get_options(self, key):  | 
|
2370  | 
"""Return a list representing options.  | 
|
2371  | 
||
2372  | 
        e.g. ['foo', 'bar']
 | 
|
2373  | 
        """
 | 
|
2374  | 
node = self._get_node(key)  | 
|
2375  | 
options = [self._get_method(node)]  | 
|
2376  | 
if node[2][0] == 'N':  | 
|
2377  | 
options.append('no-eol')  | 
|
2378  | 
return options  | 
|
2379  | 
||
2380  | 
def get_parent_map(self, keys):  | 
|
2381  | 
"""Get a map of the parents of keys.  | 
|
2382  | 
||
2383  | 
        :param keys: The keys to look up parents for.
 | 
|
2384  | 
        :return: A mapping from keys to parents. Absent keys are absent from
 | 
|
2385  | 
            the mapping.
 | 
|
2386  | 
        """
 | 
|
2387  | 
self._check_read()  | 
|
2388  | 
nodes = self._get_entries(keys)  | 
|
2389  | 
result = {}  | 
|
2390  | 
if self._parents:  | 
|
2391  | 
for node in nodes:  | 
|
2392  | 
result[node[1]] = node[3][0]  | 
|
2393  | 
else:  | 
|
2394  | 
for node in nodes:  | 
|
2395  | 
result[node[1]] = None  | 
|
2396  | 
return result  | 
|
2397  | 
||
2398  | 
def get_position(self, key):  | 
|
2399  | 
"""Return details needed to access the version.  | 
|
2400  | 
        
 | 
|
2401  | 
        :return: a tuple (index, data position, size) to hand to the access
 | 
|
2402  | 
            logic to get the record.
 | 
|
2403  | 
        """
 | 
|
2404  | 
node = self._get_node(key)  | 
|
2405  | 
return self._node_to_position(node)  | 
|
2406  | 
||
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
2407  | 
has_key = _mod_index._has_key_from_parent_map  | 
| 
3830.3.9
by Martin Pool
 Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests  | 
2408  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2409  | 
def keys(self):  | 
2410  | 
"""Get all the keys in the collection.  | 
|
2411  | 
        
 | 
|
2412  | 
        The keys are not ordered.
 | 
|
2413  | 
        """
 | 
|
2414  | 
self._check_read()  | 
|
2415  | 
return [node[1] for node in self._graph_index.iter_all_entries()]  | 
|
2416  | 
||
| 
3830.3.12
by Martin Pool
 Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks  | 
2417  | 
missing_keys = _mod_index._missing_keys_from_parent_map  | 
2418  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2419  | 
def _node_to_position(self, node):  | 
2420  | 
"""Convert an index value to position details."""  | 
|
2421  | 
bits = node[2][1:].split(' ')  | 
|
2422  | 
return node[0], int(bits[0]), int(bits[1])  | 
|
2423  | 
||
| 
3878.1.2
by John Arbash Meinel
 Move the sorting into each index, and customize it for Kndx access.  | 
2424  | 
def _sort_keys_by_io(self, keys, positions):  | 
2425  | 
"""Figure out an optimal order to read the records for the given keys.  | 
|
2426  | 
||
2427  | 
        Sort keys, grouped by index and sorted by position.
 | 
|
2428  | 
||
2429  | 
        :param keys: A list of keys whose records we want to read. This will be
 | 
|
2430  | 
            sorted 'in-place'.
 | 
|
2431  | 
        :param positions: A dict, such as the one returned by
 | 
|
2432  | 
            _get_components_positions()
 | 
|
2433  | 
        :return: None
 | 
|
2434  | 
        """
 | 
|
2435  | 
def get_index_memo(key):  | 
|
| 
3878.1.3
by John Arbash Meinel
 Add a comment about what data we are sorting by.  | 
2436  | 
            # index_memo is at offset [1]. It is made up of (GraphIndex,
 | 
2437  | 
            # position, size). GI is an object, which will be unique for each
 | 
|
2438  | 
            # pack file. This causes us to group by pack file, then sort by
 | 
|
2439  | 
            # position. Size doesn't matter, but it isn't worth breaking up the
 | 
|
2440  | 
            # tuple.
 | 
|
| 
3878.1.2
by John Arbash Meinel
 Move the sorting into each index, and customize it for Kndx access.  | 
2441  | 
return positions[key][1]  | 
2442  | 
return keys.sort(key=get_index_memo)  | 
|
2443  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2444  | 
|
2445  | 
class _KnitKeyAccess(object):  | 
|
2446  | 
"""Access to records in .knit files."""  | 
|
2447  | 
||
2448  | 
def __init__(self, transport, mapper):  | 
|
2449  | 
"""Create a _KnitKeyAccess with transport and mapper.  | 
|
2450  | 
||
2451  | 
        :param transport: The transport the access object is rooted at.
 | 
|
2452  | 
        :param mapper: The mapper used to map keys to .knit files.
 | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2453  | 
        """
 | 
2454  | 
self._transport = transport  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2455  | 
self._mapper = mapper  | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2456  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2457  | 
def add_raw_records(self, key_sizes, raw_data):  | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2458  | 
"""Add raw knit bytes to a storage area.  | 
2459  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2460  | 
        The data is spooled to the container writer in one bytes-record per
 | 
2461  | 
        raw data item.
 | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2462  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2463  | 
        :param sizes: An iterable of tuples containing the key and size of each
 | 
2464  | 
            raw data segment.
 | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2465  | 
        :param raw_data: A bytestring containing the data.
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2466  | 
        :return: A list of memos to retrieve the record later. Each memo is an
 | 
2467  | 
            opaque index memo. For _KnitKeyAccess the memo is (key, pos,
 | 
|
2468  | 
            length), where the key is the record key.
 | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2469  | 
        """
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2470  | 
if type(raw_data) != str:  | 
2471  | 
raise AssertionError(  | 
|
2472  | 
'data must be plain bytes was %s' % type(raw_data))  | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2473  | 
result = []  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2474  | 
offset = 0  | 
2475  | 
        # TODO: This can be tuned for writing to sftp and other servers where
 | 
|
2476  | 
        # append() is relatively expensive by grouping the writes to each key
 | 
|
2477  | 
        # prefix.
 | 
|
2478  | 
for key, size in key_sizes:  | 
|
2479  | 
path = self._mapper.map(key)  | 
|
2480  | 
try:  | 
|
2481  | 
base = self._transport.append_bytes(path + '.knit',  | 
|
2482  | 
raw_data[offset:offset+size])  | 
|
2483  | 
except errors.NoSuchFile:  | 
|
2484  | 
self._transport.mkdir(osutils.dirname(path))  | 
|
2485  | 
base = self._transport.append_bytes(path + '.knit',  | 
|
2486  | 
raw_data[offset:offset+size])  | 
|
2487  | 
            # if base == 0:
 | 
|
2488  | 
            # chmod.
 | 
|
2489  | 
offset += size  | 
|
2490  | 
result.append((key, base, size))  | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2491  | 
return result  | 
2492  | 
||
2493  | 
def get_raw_records(self, memos_for_retrieval):  | 
|
2494  | 
"""Get the raw bytes for a records.  | 
|
2495  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2496  | 
        :param memos_for_retrieval: An iterable containing the access memo for
 | 
2497  | 
            retrieving the bytes.
 | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2498  | 
        :return: An iterator over the bytes of the records.
 | 
2499  | 
        """
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2500  | 
        # first pass, group into same-index request to minimise readv's issued.
 | 
2501  | 
request_lists = []  | 
|
2502  | 
current_prefix = None  | 
|
2503  | 
for (key, offset, length) in memos_for_retrieval:  | 
|
2504  | 
if current_prefix == key[:-1]:  | 
|
2505  | 
current_list.append((offset, length))  | 
|
2506  | 
else:  | 
|
2507  | 
if current_prefix is not None:  | 
|
2508  | 
request_lists.append((current_prefix, current_list))  | 
|
2509  | 
current_prefix = key[:-1]  | 
|
2510  | 
current_list = [(offset, length)]  | 
|
2511  | 
        # handle the last entry
 | 
|
2512  | 
if current_prefix is not None:  | 
|
2513  | 
request_lists.append((current_prefix, current_list))  | 
|
2514  | 
for prefix, read_vector in request_lists:  | 
|
2515  | 
path = self._mapper.map(prefix) + '.knit'  | 
|
2516  | 
for pos, data in self._transport.readv(path, read_vector):  | 
|
2517  | 
yield data  | 
|
2518  | 
||
2519  | 
||
2520  | 
class _DirectPackAccess(object):  | 
|
2521  | 
"""Access to data in one or more packs with less translation."""  | 
|
2522  | 
||
| 
3789.2.5
by John Arbash Meinel
 Change _DirectPackAccess to only raise Retry when _reload_func is defined.  | 
2523  | 
def __init__(self, index_to_packs, reload_func=None):  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2524  | 
"""Create a _DirectPackAccess object.  | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2525  | 
|
2526  | 
        :param index_to_packs: A dict mapping index objects to the transport
 | 
|
2527  | 
            and file names for obtaining data.
 | 
|
| 
3789.2.5
by John Arbash Meinel
 Change _DirectPackAccess to only raise Retry when _reload_func is defined.  | 
2528  | 
        :param reload_func: A function to call if we determine that the pack
 | 
2529  | 
            files have moved and we need to reload our caches. See
 | 
|
2530  | 
            bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.
 | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2531  | 
        """
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2532  | 
self._container_writer = None  | 
2533  | 
self._write_index = None  | 
|
2534  | 
self._indices = index_to_packs  | 
|
| 
3789.2.5
by John Arbash Meinel
 Change _DirectPackAccess to only raise Retry when _reload_func is defined.  | 
2535  | 
self._reload_func = reload_func  | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2536  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2537  | 
def add_raw_records(self, key_sizes, raw_data):  | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2538  | 
"""Add raw knit bytes to a storage area.  | 
2539  | 
||
| 
2670.2.3
by Robert Collins
 Review feedback.  | 
2540  | 
        The data is spooled to the container writer in one bytes-record per
 | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2541  | 
        raw data item.
 | 
2542  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2543  | 
        :param sizes: An iterable of tuples containing the key and size of each
 | 
2544  | 
            raw data segment.
 | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2545  | 
        :param raw_data: A bytestring containing the data.
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2546  | 
        :return: A list of memos to retrieve the record later. Each memo is an
 | 
2547  | 
            opaque index memo. For _DirectPackAccess the memo is (index, pos,
 | 
|
2548  | 
            length), where the index field is the write_index object supplied
 | 
|
2549  | 
            to the PackAccess object.
 | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2550  | 
        """
 | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2551  | 
if type(raw_data) != str:  | 
2552  | 
raise AssertionError(  | 
|
2553  | 
'data must be plain bytes was %s' % type(raw_data))  | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2554  | 
result = []  | 
2555  | 
offset = 0  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2556  | 
for key, size in key_sizes:  | 
2557  | 
p_offset, p_length = self._container_writer.add_bytes_record(  | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2558  | 
raw_data[offset:offset+size], [])  | 
2559  | 
offset += size  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2560  | 
result.append((self._write_index, p_offset, p_length))  | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2561  | 
return result  | 
2562  | 
||
2563  | 
def get_raw_records(self, memos_for_retrieval):  | 
|
2564  | 
"""Get the raw bytes for a records.  | 
|
2565  | 
||
| 
2670.2.2
by Robert Collins
 * In ``bzrlib.knit`` the internal interface has been altered to use  | 
2566  | 
        :param memos_for_retrieval: An iterable containing the (index, pos, 
 | 
2567  | 
            length) memo for retrieving the bytes. The Pack access method
 | 
|
2568  | 
            looks up the pack to use for a given record in its index_to_pack
 | 
|
2569  | 
            map.
 | 
|
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2570  | 
        :return: An iterator over the bytes of the records.
 | 
2571  | 
        """
 | 
|
2572  | 
        # first pass, group into same-index requests
 | 
|
2573  | 
request_lists = []  | 
|
2574  | 
current_index = None  | 
|
2575  | 
for (index, offset, length) in memos_for_retrieval:  | 
|
2576  | 
if current_index == index:  | 
|
2577  | 
current_list.append((offset, length))  | 
|
2578  | 
else:  | 
|
2579  | 
if current_index is not None:  | 
|
2580  | 
request_lists.append((current_index, current_list))  | 
|
2581  | 
current_index = index  | 
|
2582  | 
current_list = [(offset, length)]  | 
|
2583  | 
        # handle the last entry
 | 
|
2584  | 
if current_index is not None:  | 
|
2585  | 
request_lists.append((current_index, current_list))  | 
|
2586  | 
for index, offsets in request_lists:  | 
|
| 
3789.2.1
by John Arbash Meinel
 _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.  | 
2587  | 
try:  | 
2588  | 
transport, path = self._indices[index]  | 
|
2589  | 
except KeyError:  | 
|
2590  | 
                # A KeyError here indicates that someone has triggered an index
 | 
|
2591  | 
                # reload, and this index has gone missing, we need to start
 | 
|
2592  | 
                # over.
 | 
|
| 
3789.2.5
by John Arbash Meinel
 Change _DirectPackAccess to only raise Retry when _reload_func is defined.  | 
2593  | 
if self._reload_func is None:  | 
2594  | 
                    # If we don't have a _reload_func there is nothing that can
 | 
|
2595  | 
                    # be done
 | 
|
2596  | 
                    raise
 | 
|
| 
3789.2.28
by John Arbash Meinel
 We don't actually have a transport yet, so we can't use it as context.  | 
2597  | 
raise errors.RetryWithNewPacks(index,  | 
| 
3789.2.27
by John Arbash Meinel
 Add some context information to the Retry exceptions.  | 
2598  | 
reload_occurred=True,  | 
| 
3789.2.1
by John Arbash Meinel
 _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.  | 
2599  | 
exc_info=sys.exc_info())  | 
2600  | 
try:  | 
|
2601  | 
reader = pack.make_readv_reader(transport, path, offsets)  | 
|
2602  | 
for names, read_func in reader.iter_records():  | 
|
2603  | 
yield read_func(None)  | 
|
2604  | 
except errors.NoSuchFile:  | 
|
2605  | 
                # A NoSuchFile error indicates that a pack file has gone
 | 
|
2606  | 
                # missing on disk, we need to trigger a reload, and start over.
 | 
|
| 
3789.2.5
by John Arbash Meinel
 Change _DirectPackAccess to only raise Retry when _reload_func is defined.  | 
2607  | 
if self._reload_func is None:  | 
2608  | 
                    raise
 | 
|
| 
3789.2.27
by John Arbash Meinel
 Add some context information to the Retry exceptions.  | 
2609  | 
raise errors.RetryWithNewPacks(transport.abspath(path),  | 
2610  | 
reload_occurred=False,  | 
|
| 
3789.2.1
by John Arbash Meinel
 _DirectPackAccess can now raise RetryWithNewPacks when we think something has happened.  | 
2611  | 
exc_info=sys.exc_info())  | 
| 
2592.3.66
by Robert Collins
 Allow adaption of KnitData to pack files.  | 
2612  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2613  | 
def set_writer(self, writer, index, transport_packname):  | 
| 
2592.3.70
by Robert Collins
 Allow setting a writer after creating a knit._PackAccess object.  | 
2614  | 
"""Set a writer to use for adding data."""  | 
| 
2592.3.208
by Robert Collins
 Start refactoring the knit-pack thunking to be clearer.  | 
2615  | 
if index is not None:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2616  | 
self._indices[index] = transport_packname  | 
2617  | 
self._container_writer = writer  | 
|
2618  | 
self._write_index = index  | 
|
| 
1684.3.3
by Robert Collins
 Add a special cased weaves to knit converter.  | 
2619  | 
|
| 
3789.2.5
by John Arbash Meinel
 Change _DirectPackAccess to only raise Retry when _reload_func is defined.  | 
2620  | 
def reload_or_raise(self, retry_exc):  | 
2621  | 
"""Try calling the reload function, or re-raise the original exception.  | 
|
2622  | 
||
2623  | 
        This should be called after _DirectPackAccess raises a
 | 
|
2624  | 
        RetryWithNewPacks exception. This function will handle the common logic
 | 
|
2625  | 
        of determining when the error is fatal versus being temporary.
 | 
|
2626  | 
        It will also make sure that the original exception is raised, rather
 | 
|
2627  | 
        than the RetryWithNewPacks exception.
 | 
|
2628  | 
||
2629  | 
        If this function returns, then the calling function should retry
 | 
|
2630  | 
        whatever operation was being performed. Otherwise an exception will
 | 
|
2631  | 
        be raised.
 | 
|
2632  | 
||
2633  | 
        :param retry_exc: A RetryWithNewPacks exception.
 | 
|
2634  | 
        """
 | 
|
| 
3789.2.6
by John Arbash Meinel
 Make _DirectPackAccess.reload_or_raise maintain the logic.  | 
2635  | 
is_error = False  | 
2636  | 
if self._reload_func is None:  | 
|
2637  | 
is_error = True  | 
|
2638  | 
elif not self._reload_func():  | 
|
2639  | 
            # The reload claimed that nothing changed
 | 
|
2640  | 
if not retry_exc.reload_occurred:  | 
|
2641  | 
                # If there wasn't an earlier reload, then we really were
 | 
|
2642  | 
                # expecting to find changes. We didn't find them, so this is a
 | 
|
2643  | 
                # hard error
 | 
|
2644  | 
is_error = True  | 
|
2645  | 
if is_error:  | 
|
2646  | 
exc_class, exc_value, exc_traceback = retry_exc.exc_info  | 
|
2647  | 
raise exc_class, exc_value, exc_traceback  | 
|
| 
3789.2.5
by John Arbash Meinel
 Change _DirectPackAccess to only raise Retry when _reload_func is defined.  | 
2648  | 
|
| 
1684.3.3
by Robert Collins
 Add a special cased weaves to knit converter.  | 
2649  | 
|
| 
2781.1.1
by Martin Pool
 merge cpatiencediff from Lukas  | 
2650  | 
# Deprecated, use PatienceSequenceMatcher instead
 | 
2651  | 
KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher  | 
|
| 
2484.1.1
by John Arbash Meinel
 Add an initial function to read knit indexes in pyrex.  | 
2652  | 
|
2653  | 
||
| 
2770.1.2
by Aaron Bentley
 Convert to knit-only annotation  | 
2654  | 
def annotate_knit(knit, revision_id):  | 
2655  | 
"""Annotate a knit with no cached annotations.  | 
|
2656  | 
||
2657  | 
    This implementation is for knits with no cached annotations.
 | 
|
2658  | 
    It will work for knits with cached annotations, but this is not
 | 
|
2659  | 
    recommended.
 | 
|
2660  | 
    """
 | 
|
| 
3224.1.7
by John Arbash Meinel
 _StreamIndex also needs to return the proper values for get_build_details.  | 
2661  | 
annotator = _KnitAnnotator(knit)  | 
| 
3224.1.25
by John Arbash Meinel
 Quick change to the _KnitAnnotator api to use .annotate() instead of get_annotated_lines()  | 
2662  | 
return iter(annotator.annotate(revision_id))  | 
| 
3224.1.7
by John Arbash Meinel
 _StreamIndex also needs to return the proper values for get_build_details.  | 
2663  | 
|
2664  | 
||
2665  | 
class _KnitAnnotator(object):  | 
|
| 
3224.1.5
by John Arbash Meinel
 Start using a helper class for doing the knit-pack annotations.  | 
2666  | 
"""Build up the annotations for a text."""  | 
2667  | 
||
2668  | 
def __init__(self, knit):  | 
|
2669  | 
self._knit = knit  | 
|
2670  | 
||
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2671  | 
        # Content objects, differs from fulltexts because of how final newlines
 | 
2672  | 
        # are treated by knits. the content objects here will always have a
 | 
|
2673  | 
        # final newline
 | 
|
2674  | 
self._fulltext_contents = {}  | 
|
2675  | 
||
2676  | 
        # Annotated lines of specific revisions
 | 
|
2677  | 
self._annotated_lines = {}  | 
|
2678  | 
||
2679  | 
        # Track the raw data for nodes that we could not process yet.
 | 
|
2680  | 
        # This maps the revision_id of the base to a list of children that will
 | 
|
2681  | 
        # annotated from it.
 | 
|
2682  | 
self._pending_children = {}  | 
|
2683  | 
||
| 
3224.1.29
by John Arbash Meinel
 Properly handle annotating when ghosts are present.  | 
2684  | 
        # Nodes which cannot be extracted
 | 
2685  | 
self._ghosts = set()  | 
|
2686  | 
||
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2687  | 
        # Track how many children this node has, so we know if we need to keep
 | 
2688  | 
        # it
 | 
|
2689  | 
self._annotate_children = {}  | 
|
2690  | 
self._compression_children = {}  | 
|
2691  | 
||
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2692  | 
self._all_build_details = {}  | 
| 
3224.1.10
by John Arbash Meinel
 Introduce the heads_provider for reannotate.  | 
2693  | 
        # The children => parent revision_id graph
 | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2694  | 
self._revision_id_graph = {}  | 
2695  | 
||
| 
3224.1.10
by John Arbash Meinel
 Introduce the heads_provider for reannotate.  | 
2696  | 
self._heads_provider = None  | 
2697  | 
||
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2698  | 
self._nodes_to_keep_annotations = set()  | 
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2699  | 
self._generations_until_keep = 100  | 
2700  | 
||
2701  | 
def set_generations_until_keep(self, value):  | 
|
2702  | 
"""Set the number of generations before caching a node.  | 
|
2703  | 
||
2704  | 
        Setting this to -1 will cache every merge node, setting this higher
 | 
|
2705  | 
        will cache fewer nodes.
 | 
|
2706  | 
        """
 | 
|
2707  | 
self._generations_until_keep = value  | 
|
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2708  | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2709  | 
def _add_fulltext_content(self, revision_id, content_obj):  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2710  | 
self._fulltext_contents[revision_id] = content_obj  | 
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2711  | 
        # TODO: jam 20080305 It might be good to check the sha1digest here
 | 
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2712  | 
return content_obj.text()  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2713  | 
|
2714  | 
def _check_parents(self, child, nodes_to_annotate):  | 
|
2715  | 
"""Check if all parents have been processed.  | 
|
2716  | 
||
2717  | 
        :param child: A tuple of (rev_id, parents, raw_content)
 | 
|
2718  | 
        :param nodes_to_annotate: If child is ready, add it to
 | 
|
2719  | 
            nodes_to_annotate, otherwise put it back in self._pending_children
 | 
|
2720  | 
        """
 | 
|
2721  | 
for parent_id in child[1]:  | 
|
| 
3224.1.29
by John Arbash Meinel
 Properly handle annotating when ghosts are present.  | 
2722  | 
if (parent_id not in self._annotated_lines):  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2723  | 
                # This parent is present, but another parent is missing
 | 
2724  | 
self._pending_children.setdefault(parent_id,  | 
|
2725  | 
[]).append(child)  | 
|
2726  | 
                break
 | 
|
2727  | 
else:  | 
|
2728  | 
            # This one is ready to be processed
 | 
|
2729  | 
nodes_to_annotate.append(child)  | 
|
2730  | 
||
2731  | 
def _add_annotation(self, revision_id, fulltext, parent_ids,  | 
|
2732  | 
left_matching_blocks=None):  | 
|
2733  | 
"""Add an annotation entry.  | 
|
2734  | 
||
2735  | 
        All parents should already have been annotated.
 | 
|
2736  | 
        :return: A list of children that now have their parents satisfied.
 | 
|
2737  | 
        """
 | 
|
2738  | 
a = self._annotated_lines  | 
|
2739  | 
annotated_parent_lines = [a[p] for p in parent_ids]  | 
|
2740  | 
annotated_lines = list(annotate.reannotate(annotated_parent_lines,  | 
|
| 
3224.1.10
by John Arbash Meinel
 Introduce the heads_provider for reannotate.  | 
2741  | 
fulltext, revision_id, left_matching_blocks,  | 
2742  | 
heads_provider=self._get_heads_provider()))  | 
|
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2743  | 
self._annotated_lines[revision_id] = annotated_lines  | 
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2744  | 
for p in parent_ids:  | 
2745  | 
ann_children = self._annotate_children[p]  | 
|
2746  | 
ann_children.remove(revision_id)  | 
|
2747  | 
if (not ann_children  | 
|
2748  | 
and p not in self._nodes_to_keep_annotations):  | 
|
2749  | 
del self._annotated_lines[p]  | 
|
2750  | 
del self._all_build_details[p]  | 
|
2751  | 
if p in self._fulltext_contents:  | 
|
2752  | 
del self._fulltext_contents[p]  | 
|
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2753  | 
        # Now that we've added this one, see if there are any pending
 | 
2754  | 
        # deltas to be done, certainly this parent is finished
 | 
|
2755  | 
nodes_to_annotate = []  | 
|
2756  | 
for child in self._pending_children.pop(revision_id, []):  | 
|
2757  | 
self._check_parents(child, nodes_to_annotate)  | 
|
2758  | 
return nodes_to_annotate  | 
|
2759  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2760  | 
def _get_build_graph(self, key):  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2761  | 
"""Get the graphs for building texts and annotations.  | 
2762  | 
||
2763  | 
        The data you need for creating a full text may be different than the
 | 
|
2764  | 
        data you need to annotate that text. (At a minimum, you need both
 | 
|
2765  | 
        parents to create an annotation, but only need 1 parent to generate the
 | 
|
2766  | 
        fulltext.)
 | 
|
2767  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2768  | 
        :return: A list of (key, index_memo) records, suitable for
 | 
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2769  | 
            passing to read_records_iter to start reading in the raw data fro/
 | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2770  | 
            the pack file.
 | 
2771  | 
        """
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2772  | 
if key in self._annotated_lines:  | 
| 
3224.1.10
by John Arbash Meinel
 Introduce the heads_provider for reannotate.  | 
2773  | 
            # Nothing to do
 | 
2774  | 
return []  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2775  | 
pending = set([key])  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2776  | 
records = []  | 
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2777  | 
generation = 0  | 
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2778  | 
kept_generation = 0  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2779  | 
while pending:  | 
2780  | 
            # get all pending nodes
 | 
|
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2781  | 
generation += 1  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2782  | 
this_iteration = pending  | 
2783  | 
build_details = self._knit._index.get_build_details(this_iteration)  | 
|
2784  | 
self._all_build_details.update(build_details)  | 
|
2785  | 
            # new_nodes = self._knit._index._get_entries(this_iteration)
 | 
|
2786  | 
pending = set()  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2787  | 
for key, details in build_details.iteritems():  | 
| 
3224.1.14
by John Arbash Meinel
 Switch to making content_details opaque, step 1  | 
2788  | 
(index_memo, compression_parent, parents,  | 
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2789  | 
record_details) = details  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2790  | 
self._revision_id_graph[key] = parents  | 
2791  | 
records.append((key, index_memo))  | 
|
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2792  | 
                # Do we actually need to check _annotated_lines?
 | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2793  | 
pending.update(p for p in parents  | 
2794  | 
if p not in self._all_build_details)  | 
|
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2795  | 
if compression_parent:  | 
2796  | 
self._compression_children.setdefault(compression_parent,  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2797  | 
[]).append(key)  | 
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2798  | 
if parents:  | 
2799  | 
for parent in parents:  | 
|
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2800  | 
self._annotate_children.setdefault(parent,  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2801  | 
[]).append(key)  | 
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2802  | 
num_gens = generation - kept_generation  | 
2803  | 
if ((num_gens >= self._generations_until_keep)  | 
|
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2804  | 
and len(parents) > 1):  | 
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2805  | 
kept_generation = generation  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2806  | 
self._nodes_to_keep_annotations.add(key)  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2807  | 
|
2808  | 
missing_versions = this_iteration.difference(build_details.keys())  | 
|
| 
3224.1.29
by John Arbash Meinel
 Properly handle annotating when ghosts are present.  | 
2809  | 
self._ghosts.update(missing_versions)  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2810  | 
for missing_version in missing_versions:  | 
2811  | 
                # add a key, no parents
 | 
|
| 
3224.1.29
by John Arbash Meinel
 Properly handle annotating when ghosts are present.  | 
2812  | 
self._revision_id_graph[missing_version] = ()  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2813  | 
pending.discard(missing_version) # don't look for it  | 
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
2814  | 
if self._ghosts.intersection(self._compression_children):  | 
2815  | 
raise KnitCorrupt(  | 
|
2816  | 
"We cannot have nodes which have a ghost compression parent:\n"  | 
|
2817  | 
"ghosts: %r\n"  | 
|
2818  | 
"compression children: %r"  | 
|
2819  | 
% (self._ghosts, self._compression_children))  | 
|
| 
3224.1.29
by John Arbash Meinel
 Properly handle annotating when ghosts are present.  | 
2820  | 
        # Cleanout anything that depends on a ghost so that we don't wait for
 | 
2821  | 
        # the ghost to show up
 | 
|
2822  | 
for node in self._ghosts:  | 
|
2823  | 
if node in self._annotate_children:  | 
|
2824  | 
                # We won't be building this node
 | 
|
2825  | 
del self._annotate_children[node]  | 
|
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2826  | 
        # Generally we will want to read the records in reverse order, because
 | 
2827  | 
        # we find the parent nodes after the children
 | 
|
2828  | 
records.reverse()  | 
|
2829  | 
return records  | 
|
2830  | 
||
2831  | 
def _annotate_records(self, records):  | 
|
2832  | 
"""Build the annotations for the listed records."""  | 
|
2833  | 
        # We iterate in the order read, rather than a strict order requested
 | 
|
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2834  | 
        # However, process what we can, and put off to the side things that
 | 
2835  | 
        # still need parents, cleaning them up when those parents are
 | 
|
2836  | 
        # processed.
 | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2837  | 
for (rev_id, record,  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2838  | 
digest) in self._knit._read_records_iter(records):  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2839  | 
if rev_id in self._annotated_lines:  | 
2840  | 
                continue
 | 
|
2841  | 
parent_ids = self._revision_id_graph[rev_id]  | 
|
| 
3224.1.29
by John Arbash Meinel
 Properly handle annotating when ghosts are present.  | 
2842  | 
parent_ids = [p for p in parent_ids if p not in self._ghosts]  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2843  | 
details = self._all_build_details[rev_id]  | 
| 
3224.1.14
by John Arbash Meinel
 Switch to making content_details opaque, step 1  | 
2844  | 
(index_memo, compression_parent, parents,  | 
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2845  | 
record_details) = details  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2846  | 
nodes_to_annotate = []  | 
2847  | 
            # TODO: Remove the punning between compression parents, and
 | 
|
2848  | 
            #       parent_ids, we should be able to do this without assuming
 | 
|
2849  | 
            #       the build order
 | 
|
2850  | 
if len(parent_ids) == 0:  | 
|
2851  | 
                # There are no parents for this node, so just add it
 | 
|
2852  | 
                # TODO: This probably needs to be decoupled
 | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2853  | 
fulltext_content, delta = self._knit._factory.parse_record(  | 
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2854  | 
rev_id, record, record_details, None)  | 
2855  | 
fulltext = self._add_fulltext_content(rev_id, fulltext_content)  | 
|
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2856  | 
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,  | 
2857  | 
parent_ids, left_matching_blocks=None))  | 
|
2858  | 
else:  | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2859  | 
child = (rev_id, parent_ids, record)  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2860  | 
                # Check if all the parents are present
 | 
2861  | 
self._check_parents(child, nodes_to_annotate)  | 
|
2862  | 
while nodes_to_annotate:  | 
|
2863  | 
                # Should we use a queue here instead of a stack?
 | 
|
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2864  | 
(rev_id, parent_ids, record) = nodes_to_annotate.pop()  | 
| 
3224.1.14
by John Arbash Meinel
 Switch to making content_details opaque, step 1  | 
2865  | 
(index_memo, compression_parent, parents,  | 
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2866  | 
record_details) = self._all_build_details[rev_id]  | 
| 
3777.4.1
by John Arbash Meinel
 Two fixes for annotate code.  | 
2867  | 
blocks = None  | 
| 
3224.1.14
by John Arbash Meinel
 Switch to making content_details opaque, step 1  | 
2868  | 
if compression_parent is not None:  | 
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2869  | 
comp_children = self._compression_children[compression_parent]  | 
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
2870  | 
if rev_id not in comp_children:  | 
2871  | 
raise AssertionError("%r not in compression children %r"  | 
|
2872  | 
% (rev_id, comp_children))  | 
|
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2873  | 
                    # If there is only 1 child, it is safe to reuse this
 | 
2874  | 
                    # content
 | 
|
2875  | 
reuse_content = (len(comp_children) == 1  | 
|
2876  | 
and compression_parent not in  | 
|
2877  | 
self._nodes_to_keep_annotations)  | 
|
2878  | 
if reuse_content:  | 
|
2879  | 
                        # Remove it from the cache since it will be changing
 | 
|
2880  | 
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)  | 
|
2881  | 
                        # Make sure to copy the fulltext since it might be
 | 
|
2882  | 
                        # modified
 | 
|
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2883  | 
parent_fulltext = list(parent_fulltext_content.text())  | 
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2884  | 
else:  | 
2885  | 
parent_fulltext_content = self._fulltext_contents[compression_parent]  | 
|
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2886  | 
parent_fulltext = parent_fulltext_content.text()  | 
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2887  | 
comp_children.remove(rev_id)  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2888  | 
fulltext_content, delta = self._knit._factory.parse_record(  | 
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2889  | 
rev_id, record, record_details,  | 
2890  | 
parent_fulltext_content,  | 
|
| 
3224.1.19
by John Arbash Meinel
 Work on removing nodes from the working set once they aren't needed.  | 
2891  | 
copy_base_content=(not reuse_content))  | 
| 
3224.1.22
by John Arbash Meinel
 Cleanup the extra debugging info, and some >80 char lines.  | 
2892  | 
fulltext = self._add_fulltext_content(rev_id,  | 
2893  | 
fulltext_content)  | 
|
| 
3777.4.1
by John Arbash Meinel
 Two fixes for annotate code.  | 
2894  | 
if compression_parent == parent_ids[0]:  | 
2895  | 
                        # the compression_parent is the left parent, so we can
 | 
|
2896  | 
                        # re-use the delta
 | 
|
2897  | 
blocks = KnitContent.get_line_delta_blocks(delta,  | 
|
2898  | 
parent_fulltext, fulltext)  | 
|
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2899  | 
else:  | 
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2900  | 
fulltext_content = self._knit._factory.parse_fulltext(  | 
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2901  | 
record, rev_id)  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2902  | 
fulltext = self._add_fulltext_content(rev_id,  | 
| 
3224.1.15
by John Arbash Meinel
 Finish removing method and noeol from general knowledge,  | 
2903  | 
fulltext_content)  | 
| 
3224.1.6
by John Arbash Meinel
 Refactor the annotation logic into a helper class.  | 
2904  | 
nodes_to_annotate.extend(  | 
2905  | 
self._add_annotation(rev_id, fulltext, parent_ids,  | 
|
2906  | 
left_matching_blocks=blocks))  | 
|
2907  | 
||
| 
3224.1.10
by John Arbash Meinel
 Introduce the heads_provider for reannotate.  | 
2908  | 
def _get_heads_provider(self):  | 
2909  | 
"""Create a heads provider for resolving ancestry issues."""  | 
|
2910  | 
if self._heads_provider is not None:  | 
|
2911  | 
return self._heads_provider  | 
|
2912  | 
parent_provider = _mod_graph.DictParentsProvider(  | 
|
2913  | 
self._revision_id_graph)  | 
|
2914  | 
graph_obj = _mod_graph.Graph(parent_provider)  | 
|
| 
3224.1.20
by John Arbash Meinel
 Reduce the number of cache misses by caching known heads answers  | 
2915  | 
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)  | 
| 
3224.1.10
by John Arbash Meinel
 Introduce the heads_provider for reannotate.  | 
2916  | 
self._heads_provider = head_cache  | 
2917  | 
return head_cache  | 
|
2918  | 
||
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2919  | 
def annotate(self, key):  | 
2920  | 
"""Return the annotated fulltext at the given key.  | 
|
| 
3224.1.5
by John Arbash Meinel
 Start using a helper class for doing the knit-pack annotations.  | 
2921  | 
|
| 
3350.6.4
by Robert Collins
 First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.  | 
2922  | 
        :param key: The key to annotate.
 | 
| 
3224.1.5
by John Arbash Meinel
 Start using a helper class for doing the knit-pack annotations.  | 
2923  | 
        """
 | 
| 
3777.4.1
by John Arbash Meinel
 Two fixes for annotate code.  | 
2924  | 
if len(self._knit._fallback_vfs) > 0:  | 
| 
3517.4.1
by Martin Pool
 Merge unoptimized annotate code for stacking, and only use it when needed  | 
2925  | 
            # stacked knits can't use the fast path at present.
 | 
2926  | 
return self._simple_annotate(key)  | 
|
| 
3789.2.13
by John Arbash Meinel
 KnitVersionedFile.annotate() now retries when appropriate.  | 
2927  | 
while True:  | 
2928  | 
try:  | 
|
2929  | 
records = self._get_build_graph(key)  | 
|
2930  | 
if key in self._ghosts:  | 
|
2931  | 
raise errors.RevisionNotPresent(key, self._knit)  | 
|
2932  | 
self._annotate_records(records)  | 
|
2933  | 
return self._annotated_lines[key]  | 
|
2934  | 
except errors.RetryWithNewPacks, e:  | 
|
2935  | 
self._knit._access.reload_or_raise(e)  | 
|
2936  | 
                # The cached build_details are no longer valid
 | 
|
2937  | 
self._all_build_details.clear()  | 
|
| 
3224.1.5
by John Arbash Meinel
 Start using a helper class for doing the knit-pack annotations.  | 
2938  | 
|
| 
3517.4.1
by Martin Pool
 Merge unoptimized annotate code for stacking, and only use it when needed  | 
2939  | 
def _simple_annotate(self, key):  | 
2940  | 
"""Return annotated fulltext, rediffing from the full texts.  | 
|
2941  | 
||
2942  | 
        This is slow but makes no assumptions about the repository
 | 
|
2943  | 
        being able to produce line deltas.
 | 
|
2944  | 
        """
 | 
|
2945  | 
        # TODO: this code generates a parent maps of present ancestors; it
 | 
|
2946  | 
        # could be split out into a separate method, and probably should use
 | 
|
2947  | 
        # iter_ancestry instead. -- mbp and robertc 20080704
 | 
|
| 
3535.5.1
by John Arbash Meinel
 cleanup a few imports to be lazily loaded.  | 
2948  | 
graph = _mod_graph.Graph(self._knit)  | 
| 
3350.9.1
by Robert Collins
 Redo annotate more simply, using just the public interfaces for VersionedFiles.  | 
2949  | 
head_cache = _mod_graph.FrozenHeadsCache(graph)  | 
2950  | 
search = graph._make_breadth_first_searcher([key])  | 
|
2951  | 
keys = set()  | 
|
2952  | 
while True:  | 
|
2953  | 
try:  | 
|
2954  | 
present, ghosts = search.next_with_ghosts()  | 
|
2955  | 
except StopIteration:  | 
|
2956  | 
                break
 | 
|
2957  | 
keys.update(present)  | 
|
2958  | 
parent_map = self._knit.get_parent_map(keys)  | 
|
2959  | 
parent_cache = {}  | 
|
2960  | 
reannotate = annotate.reannotate  | 
|
2961  | 
for record in self._knit.get_record_stream(keys, 'topological', True):  | 
|
2962  | 
key = record.key  | 
|
| 
3890.2.9
by John Arbash Meinel
 Start using osutils.chunks_as_lines rather than osutils.split_lines.  | 
2963  | 
fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))  | 
| 
3517.4.2
by Martin Pool
 Make simple-annotation and graph code more tolerant of knits with no graph  | 
2964  | 
parents = parent_map[key]  | 
2965  | 
if parents is not None:  | 
|
2966  | 
parent_lines = [parent_cache[parent] for parent in parent_map[key]]  | 
|
2967  | 
else:  | 
|
2968  | 
parent_lines = []  | 
|
| 
3350.9.1
by Robert Collins
 Redo annotate more simply, using just the public interfaces for VersionedFiles.  | 
2969  | 
parent_cache[key] = list(  | 
2970  | 
reannotate(parent_lines, fulltext, key, None, head_cache))  | 
|
| 
3517.4.2
by Martin Pool
 Make simple-annotation and graph code more tolerant of knits with no graph  | 
2971  | 
try:  | 
2972  | 
return parent_cache[key]  | 
|
2973  | 
except KeyError, e:  | 
|
2974  | 
raise errors.RevisionNotPresent(key, self._knit)  | 
|
| 
3224.1.5
by John Arbash Meinel
 Start using a helper class for doing the knit-pack annotations.  | 
2975  | 
|
2976  | 
||
| 
2484.1.1
by John Arbash Meinel
 Add an initial function to read knit indexes in pyrex.  | 
2977  | 
try:  | 
| 
2484.1.12
by John Arbash Meinel
 Switch the layout to use a matching _knit_load_data_py.py and _knit_load_data_c.pyx  | 
2978  | 
from bzrlib._knit_load_data_c import _load_data_c as _load_data  | 
| 
2484.1.1
by John Arbash Meinel
 Add an initial function to read knit indexes in pyrex.  | 
2979  | 
except ImportError:  | 
| 
2484.1.12
by John Arbash Meinel
 Switch the layout to use a matching _knit_load_data_py.py and _knit_load_data_c.pyx  | 
2980  | 
from bzrlib._knit_load_data_py import _load_data_py as _load_data  |