/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3735.2.1 by Robert Collins
Add the concept of CHK lookups to Repository.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3735.2.1 by Robert Collins
Add the concept of CHK lookups to Repository.
16
17
"""Tests for repositories that support CHK indices."""
18
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
19
from breezy import (
4634.35.10 by Andrew Bennetts
Move tests to per_repository_chk.
20
    errors,
21
    osutils,
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
22
    repository,
4634.35.10 by Andrew Bennetts
Move tests to per_repository_chk.
23
    )
6670.4.3 by Jelmer Vernooij
Fix more imports.
24
from breezy.bzr import (
25
    btree_index,
26
    )
27
from breezy.bzr.versionedfile import VersionedFiles
6670.4.14 by Jelmer Vernooij
Move remote to breezy.bzr.
28
from breezy.bzr.remote import RemoteRepository
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
29
from breezy.tests import TestNotApplicable
30
from breezy.tests.per_repository_chk import TestCaseWithRepositoryCHK
3735.2.4 by Robert Collins
Test RemoteRepository with and with-out chk index backing formats.
31
32
33
class TestCHKSupport(TestCaseWithRepositoryCHK):
3735.2.1 by Robert Collins
Add the concept of CHK lookups to Repository.
34
35
    def test_chk_bytes_attribute_is_VersionedFiles(self):
36
        repo = self.make_repository('.')
37
        self.assertIsInstance(repo.chk_bytes, VersionedFiles)
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
38
39
    def test_add_bytes_to_chk_bytes_store(self):
40
        repo = self.make_repository('.')
41
        repo.lock_write()
42
        try:
43
            repo.start_write_group()
44
            try:
45
                sha1, len, _ = repo.chk_bytes.add_lines((None,),
46
                    None, ["foo\n", "bar\n"], random_id=True)
47
                self.assertEqual('4e48e2c9a3d2ca8a708cb0cc545700544efb5021',
48
                    sha1)
49
                self.assertEqual(
6619.3.12 by Jelmer Vernooij
Use 2to3 set_literal fixer.
50
                    {('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)},
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
51
                    repo.chk_bytes.keys())
52
            except:
53
                repo.abort_write_group()
54
                raise
55
            else:
56
                repo.commit_write_group()
57
        finally:
58
            repo.unlock()
59
        # And after an unlock/lock pair
60
        repo.lock_read()
61
        try:
62
            self.assertEqual(
6619.3.12 by Jelmer Vernooij
Use 2to3 set_literal fixer.
63
                {('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)},
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
64
                repo.chk_bytes.keys())
65
        finally:
66
            repo.unlock()
67
        # and reopening
6653.6.1 by Jelmer Vernooij
Rename a number of attributes from bzrdir to controldir.
68
        repo = repo.controldir.open_repository()
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
69
        repo.lock_read()
70
        try:
71
            self.assertEqual(
6619.3.12 by Jelmer Vernooij
Use 2to3 set_literal fixer.
72
                {('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)},
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
73
                repo.chk_bytes.keys())
74
        finally:
75
            repo.unlock()
76
77
    def test_pack_preserves_chk_bytes_store(self):
3735.2.77 by John Arbash Meinel
Fix 'test_pack_preserves_chk_bytes_store'.
78
        leaf_lines = ["chkleaf:\n", "0\n", "1\n", "0\n", "\n"]
79
        leaf_sha1 = osutils.sha_strings(leaf_lines)
80
        node_lines = ["chknode:\n", "0\n", "1\n", "1\n", "foo\n",
81
                      "\x00sha1:%s\n" % (leaf_sha1,)]
82
        node_sha1 = osutils.sha_strings(node_lines)
6619.3.12 by Jelmer Vernooij
Use 2to3 set_literal fixer.
83
        expected_set = {('sha1:' + leaf_sha1,), ('sha1:' + node_sha1,)}
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
84
        repo = self.make_repository('.')
85
        repo.lock_write()
86
        try:
87
            repo.start_write_group()
88
            try:
3735.2.26 by Robert Collins
CHKInventory migrated to new CHKMap code.
89
                # Internal node pointing at a leaf.
3735.2.77 by John Arbash Meinel
Fix 'test_pack_preserves_chk_bytes_store'.
90
                repo.chk_bytes.add_lines((None,), None, node_lines, random_id=True)
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
91
            except:
92
                repo.abort_write_group()
93
                raise
94
            else:
95
                repo.commit_write_group()
96
            repo.start_write_group()
97
            try:
3735.2.26 by Robert Collins
CHKInventory migrated to new CHKMap code.
98
                # Leaf in a separate pack.
3735.2.77 by John Arbash Meinel
Fix 'test_pack_preserves_chk_bytes_store'.
99
                repo.chk_bytes.add_lines((None,), None, leaf_lines, random_id=True)
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
100
            except:
101
                repo.abort_write_group()
102
                raise
103
            else:
104
                repo.commit_write_group()
105
            repo.pack()
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
106
            self.assertEqual(expected_set, repo.chk_bytes.keys())
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
107
        finally:
108
            repo.unlock()
109
        # and reopening
6653.6.1 by Jelmer Vernooij
Rename a number of attributes from bzrdir to controldir.
110
        repo = repo.controldir.open_repository()
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
111
        repo.lock_read()
112
        try:
3735.2.18 by Robert Collins
Partial multi-layer chk dictionary trees.
113
            self.assertEqual(expected_set, repo.chk_bytes.keys())
3735.2.6 by Robert Collins
Basic add-and-pack of CHK content from within a repository.
114
        finally:
115
            repo.unlock()
4634.35.10 by Andrew Bennetts
Move tests to per_repository_chk.
116
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
117
    def test_chk_bytes_are_fully_buffered(self):
118
        repo = self.make_repository('.')
119
        repo.lock_write()
120
        self.addCleanup(repo.unlock)
121
        repo.start_write_group()
122
        try:
123
            sha1, len, _ = repo.chk_bytes.add_lines((None,),
124
                None, ["foo\n", "bar\n"], random_id=True)
125
            self.assertEqual('4e48e2c9a3d2ca8a708cb0cc545700544efb5021',
126
                sha1)
127
            self.assertEqual(
6619.3.12 by Jelmer Vernooij
Use 2to3 set_literal fixer.
128
                {('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)},
4634.71.1 by John Arbash Meinel
Work around bug #402623 by allowing BTreeGraphIndex(...,unlimited_cache=True).
129
                repo.chk_bytes.keys())
130
        except:
131
            repo.abort_write_group()
132
            raise
133
        else:
134
            repo.commit_write_group()
135
        # This may not always be correct if we change away from BTreeGraphIndex
136
        # in the future. But for now, lets check that chk_bytes are fully
137
        # buffered
138
        index = repo.chk_bytes._index._graph_index._indices[0]
139
        self.assertIsInstance(index, btree_index.BTreeGraphIndex)
140
        self.assertIs(type(index._leaf_node_cache), dict)
141
        # Re-opening the repository should also have a repo with everything
142
        # fully buffered
143
        repo2 = repository.Repository.open(self.get_url())
144
        repo2.lock_read()
145
        self.addCleanup(repo2.unlock)
146
        index = repo2.chk_bytes._index._graph_index._indices[0]
147
        self.assertIsInstance(index, btree_index.BTreeGraphIndex)
148
        self.assertIs(type(index._leaf_node_cache), dict)
149
4634.35.10 by Andrew Bennetts
Move tests to per_repository_chk.
150
151
class TestCommitWriteGroupIntegrityCheck(TestCaseWithRepositoryCHK):
152
    """Tests that commit_write_group prevents various kinds of invalid data
153
    from being committed to a CHK repository.
154
    """
155
156
    def reopen_repo_and_resume_write_group(self, repo):
157
        resume_tokens = repo.suspend_write_group()
158
        repo.unlock()
6653.6.1 by Jelmer Vernooij
Rename a number of attributes from bzrdir to controldir.
159
        reopened_repo = repo.controldir.open_repository()
4634.35.10 by Andrew Bennetts
Move tests to per_repository_chk.
160
        reopened_repo.lock_write()
161
        self.addCleanup(reopened_repo.unlock)
162
        reopened_repo.resume_write_group(resume_tokens)
163
        return reopened_repo
164
165
    def test_missing_chk_root_for_inventory(self):
166
        """commit_write_group fails with BzrCheckError when the chk root record
167
        for a new inventory is missing.
168
        """
169
        repo = self.make_repository('damaged-repo')
170
        builder = self.make_branch_builder('simple-branch')
171
        builder.build_snapshot('A-id', None, [
172
            ('add', ('', 'root-id', 'directory', None)),
173
            ('add', ('file', 'file-id', 'file', 'content\n'))])
174
        b = builder.get_branch()
175
        b.lock_read()
176
        self.addCleanup(b.unlock)
177
        repo.lock_write()
178
        repo.start_write_group()
179
        # Now, add the objects manually
180
        text_keys = [('file-id', 'A-id'), ('root-id', 'A-id')]
181
        # Directly add the texts, inventory, and revision object for 'A-id' --
182
        # but don't add the chk_bytes.
183
        src_repo = b.repository
184
        repo.texts.insert_record_stream(src_repo.texts.get_record_stream(
185
            text_keys, 'unordered', True))
186
        repo.inventories.insert_record_stream(
187
            src_repo.inventories.get_record_stream(
188
                [('A-id',)], 'unordered', True))
189
        repo.revisions.insert_record_stream(
190
            src_repo.revisions.get_record_stream(
191
                [('A-id',)], 'unordered', True))
192
        # Make sure the presence of the missing data in a fallback does not
193
        # avoid the error.
194
        repo.add_fallback_repository(b.repository)
195
        self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
196
        reopened_repo = self.reopen_repo_and_resume_write_group(repo)
197
        self.assertRaises(
198
            errors.BzrCheckError, reopened_repo.commit_write_group)
199
        reopened_repo.abort_write_group()
200
201
    def test_missing_chk_root_for_unchanged_inventory(self):
202
        """commit_write_group fails with BzrCheckError when the chk root record
203
        for a new inventory is missing, even if the parent inventory is present
204
        and has identical content (i.e. the same chk root).
205
        
206
        A stacked repository containing only a revision with an identical
207
        inventory to its parent will still have the chk root records for those
208
        inventories.
209
210
        (In principle the chk records are unnecessary in this case, but in
211
        practice bzr 2.0rc1 (at least) expects to find them.)
212
        """
213
        repo = self.make_repository('damaged-repo')
214
        # Make a branch where the last two revisions have identical
215
        # inventories.
216
        builder = self.make_branch_builder('simple-branch')
217
        builder.build_snapshot('A-id', None, [
218
            ('add', ('', 'root-id', 'directory', None)),
219
            ('add', ('file', 'file-id', 'file', 'content\n'))])
220
        builder.build_snapshot('B-id', None, [])
221
        builder.build_snapshot('C-id', None, [])
222
        b = builder.get_branch()
223
        b.lock_read()
224
        self.addCleanup(b.unlock)
225
        # check our setup: B-id and C-id should have identical chk root keys.
226
        inv_b = b.repository.get_inventory('B-id')
227
        inv_c = b.repository.get_inventory('C-id')
6282.6.40 by Jelmer Vernooij
Fix tests.
228
        if not isinstance(repo, RemoteRepository):
229
            # Remote repositories always return plain inventories
230
            self.assertEqual(inv_b.id_to_entry.key(), inv_c.id_to_entry.key())
4634.35.10 by Andrew Bennetts
Move tests to per_repository_chk.
231
        # Now, manually insert objects for a stacked repo with only revision
232
        # C-id:
233
        # We need ('revisions', 'C-id'), ('inventories', 'C-id'),
234
        # ('inventories', 'B-id'), and the corresponding chk roots for those
235
        # inventories.
236
        repo.lock_write()
237
        repo.start_write_group()
238
        src_repo = b.repository
239
        repo.inventories.insert_record_stream(
240
            src_repo.inventories.get_record_stream(
241
                [('B-id',), ('C-id',)], 'unordered', True))
242
        repo.revisions.insert_record_stream(
243
            src_repo.revisions.get_record_stream(
244
                [('C-id',)], 'unordered', True))
245
        # Make sure the presence of the missing data in a fallback does not
246
        # avoid the error.
247
        repo.add_fallback_repository(b.repository)
248
        self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
249
        reopened_repo = self.reopen_repo_and_resume_write_group(repo)
250
        self.assertRaises(
251
            errors.BzrCheckError, reopened_repo.commit_write_group)
252
        reopened_repo.abort_write_group()
253
254
    def test_missing_chk_leaf_for_inventory(self):
255
        """commit_write_group fails with BzrCheckError when the chk root record
256
        for a parent inventory of a new revision is missing.
257
        """
258
        repo = self.make_repository('damaged-repo')
6282.6.40 by Jelmer Vernooij
Fix tests.
259
        if isinstance(repo, RemoteRepository):
260
            raise TestNotApplicable(
261
                "Unable to obtain CHKInventory from remote repo")
4634.35.10 by Andrew Bennetts
Move tests to per_repository_chk.
262
        b = self.make_branch_with_multiple_chk_nodes()
263
        src_repo = b.repository
264
        src_repo.lock_read()
265
        self.addCleanup(src_repo.unlock)
266
        # Now, manually insert objects for a stacked repo with only revision
267
        # C-id, *except* drop the non-root chk records.
268
        inv_b = src_repo.get_inventory('B-id')
269
        inv_c = src_repo.get_inventory('C-id')
270
        chk_root_keys_only = [
271
            inv_b.id_to_entry.key(), inv_b.parent_id_basename_to_file_id.key(),
272
            inv_c.id_to_entry.key(), inv_c.parent_id_basename_to_file_id.key()]
273
        all_chks = src_repo.chk_bytes.keys()
274
        # Pick a non-root key to drop
275
        key_to_drop = all_chks.difference(chk_root_keys_only).pop()
276
        all_chks.discard(key_to_drop)
277
        repo.lock_write()
278
        repo.start_write_group()
279
        repo.chk_bytes.insert_record_stream(
280
            src_repo.chk_bytes.get_record_stream(
281
                all_chks, 'unordered', True))
282
        repo.texts.insert_record_stream(
283
            src_repo.texts.get_record_stream(
284
                src_repo.texts.keys(), 'unordered', True))
285
        repo.inventories.insert_record_stream(
286
            src_repo.inventories.get_record_stream(
287
                [('B-id',), ('C-id',)], 'unordered', True))
288
        repo.revisions.insert_record_stream(
289
            src_repo.revisions.get_record_stream(
290
                [('C-id',)], 'unordered', True))
291
        # Make sure the presence of the missing data in a fallback does not
292
        # avoid the error.
293
        repo.add_fallback_repository(b.repository)
294
        self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
295
        reopened_repo = self.reopen_repo_and_resume_write_group(repo)
296
        self.assertRaises(
297
            errors.BzrCheckError, reopened_repo.commit_write_group)
298
        reopened_repo.abort_write_group()
299
300
    def test_missing_chk_root_for_parent_inventory(self):
301
        """commit_write_group fails with BzrCheckError when the chk root record
302
        for a parent inventory of a new revision is missing.
303
        """
304
        repo = self.make_repository('damaged-repo')
6282.6.40 by Jelmer Vernooij
Fix tests.
305
        if isinstance(repo, RemoteRepository):
306
            raise TestNotApplicable(
307
                "Unable to obtain CHKInventory from remote repo")
4634.35.10 by Andrew Bennetts
Move tests to per_repository_chk.
308
        b = self.make_branch_with_multiple_chk_nodes()
309
        b.lock_read()
310
        self.addCleanup(b.unlock)
311
        # Now, manually insert objects for a stacked repo with only revision
312
        # C-id, *except* the chk root entry for the parent inventory.
313
        # We need ('revisions', 'C-id'), ('inventories', 'C-id'),
314
        # ('inventories', 'B-id'), and the corresponding chk roots for those
315
        # inventories.
316
        inv_c = b.repository.get_inventory('C-id')
317
        chk_keys_for_c_only = [
318
            inv_c.id_to_entry.key(), inv_c.parent_id_basename_to_file_id.key()]
319
        repo.lock_write()
320
        repo.start_write_group()
321
        src_repo = b.repository
322
        repo.chk_bytes.insert_record_stream(
323
            src_repo.chk_bytes.get_record_stream(
324
                chk_keys_for_c_only, 'unordered', True))
325
        repo.inventories.insert_record_stream(
326
            src_repo.inventories.get_record_stream(
327
                [('B-id',), ('C-id',)], 'unordered', True))
328
        repo.revisions.insert_record_stream(
329
            src_repo.revisions.get_record_stream(
330
                [('C-id',)], 'unordered', True))
331
        # Make sure the presence of the missing data in a fallback does not
332
        # avoid the error.
333
        repo.add_fallback_repository(b.repository)
334
        self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
335
        reopened_repo = self.reopen_repo_and_resume_write_group(repo)
336
        self.assertRaises(
337
            errors.BzrCheckError, reopened_repo.commit_write_group)
338
        reopened_repo.abort_write_group()
339
340
    def make_branch_with_multiple_chk_nodes(self):
341
        # add and modify files with very long file-ids, so that the chk map
342
        # will need more than just a root node.
343
        builder = self.make_branch_builder('simple-branch')
344
        file_adds = []
345
        file_modifies = []
346
        for char in 'abc':
347
            name = char * 10000
348
            file_adds.append(
349
                ('add', ('file-' + name, 'file-%s-id' % name, 'file',
350
                         'content %s\n' % name)))
351
            file_modifies.append(
352
                ('modify', ('file-%s-id' % name, 'new content %s\n' % name)))
353
        builder.build_snapshot('A-id', None, [
354
            ('add', ('', 'root-id', 'directory', None))] +
355
            file_adds)
356
        builder.build_snapshot('B-id', None, [])
357
        builder.build_snapshot('C-id', None, file_modifies)
358
        return builder.get_branch()
359
        
360
    def test_missing_text_record(self):
361
        """commit_write_group fails with BzrCheckError when a text is missing.
362
        """
363
        repo = self.make_repository('damaged-repo')
364
        b = self.make_branch_with_multiple_chk_nodes()
365
        src_repo = b.repository
366
        src_repo.lock_read()
367
        self.addCleanup(src_repo.unlock)
368
        # Now, manually insert objects for a stacked repo with only revision
369
        # C-id, *except* drop one changed text.
370
        all_texts = src_repo.texts.keys()
371
        all_texts.remove(('file-%s-id' % ('c'*10000,), 'C-id'))
372
        repo.lock_write()
373
        repo.start_write_group()
374
        repo.chk_bytes.insert_record_stream(
375
            src_repo.chk_bytes.get_record_stream(
376
                src_repo.chk_bytes.keys(), 'unordered', True))
377
        repo.texts.insert_record_stream(
378
            src_repo.texts.get_record_stream(
379
                all_texts, 'unordered', True))
380
        repo.inventories.insert_record_stream(
381
            src_repo.inventories.get_record_stream(
382
                [('B-id',), ('C-id',)], 'unordered', True))
383
        repo.revisions.insert_record_stream(
384
            src_repo.revisions.get_record_stream(
385
                [('C-id',)], 'unordered', True))
386
        # Make sure the presence of the missing data in a fallback does not
387
        # avoid the error.
388
        repo.add_fallback_repository(b.repository)
389
        self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
390
        reopened_repo = self.reopen_repo_and_resume_write_group(repo)
391
        self.assertRaises(
392
            errors.BzrCheckError, reopened_repo.commit_write_group)
393
        reopened_repo.abort_write_group()
394
395
396
397