/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to repofmt.py

  • Committer: John Arbash Meinel
  • Date: 2009-02-27 05:15:20 UTC
  • mto: (0.23.4 groupcompress_rabin)
  • mto: This revision was merged to the branch mainline in revision 4280.
  • Revision ID: john@arbash-meinel.com-20090227051520-3bqqcchl92qup96h
Try even harder, now with even *more* streams.
The compressed size drops by another 4x.
Turn the data for each *layer* into a different stream.
With this change, gc255 has compressed inventory drop to 1.5MB
which is finally *smaller* than the source 'knit' format.

Show diffs side-by-side

added added

removed removed

Lines of Context:
290
290
                next_keys = set()
291
291
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
292
292
                                                     True)
293
 
                for record in stream:
294
 
                    bytes = record.get_bytes_as('fulltext')
295
 
                    # We don't care about search_key_func for this code,
296
 
                    # because we only care about external references.
297
 
                    node = chk_map._deserialise(bytes, record.key,
298
 
                                                search_key_func=None)
299
 
                    common_base = node._search_prefix
300
 
                    if isinstance(node, chk_map.InternalNode):
301
 
                        for prefix, value in node._items.iteritems():
302
 
                            assert isinstance(value, tuple)
303
 
                            if value not in next_keys:
304
 
                                keys_by_search_prefix.setdefault(prefix,
305
 
                                    []).append(value)
306
 
                                next_keys.add(value)
307
 
                    counter[0] += 1
308
 
                    if pb is not None:
309
 
                        pb.update('chk node', counter[0])
310
 
                    yield record
 
293
                def next_stream():
 
294
                    for record in stream:
 
295
                        bytes = record.get_bytes_as('fulltext')
 
296
                        # We don't care about search_key_func for this code,
 
297
                        # because we only care about external references.
 
298
                        node = chk_map._deserialise(bytes, record.key,
 
299
                                                    search_key_func=None)
 
300
                        common_base = node._search_prefix
 
301
                        if isinstance(node, chk_map.InternalNode):
 
302
                            for prefix, value in node._items.iteritems():
 
303
                                assert isinstance(value, tuple)
 
304
                                if value not in next_keys:
 
305
                                    keys_by_search_prefix.setdefault(prefix,
 
306
                                        []).append(value)
 
307
                                    next_keys.add(value)
 
308
                        counter[0] += 1
 
309
                        if pb is not None:
 
310
                            pb.update('chk node', counter[0])
 
311
                        yield record
 
312
                yield next_stream()
311
313
                # Double check that we won't be emitting any keys twice
312
314
                next_keys = next_keys.intersection(remaining_keys)
313
315
                cur_keys = []
314
316
                for prefix in sorted(keys_by_search_prefix):
315
317
                    cur_keys.extend(keys_by_search_prefix[prefix])
316
 
        yield _get_referenced_stream(id_roots)
317
 
        yield _get_referenced_stream(p_id_roots)
 
318
        for stream in _get_referenced_stream(id_roots):
 
319
            yield stream
 
320
        for stream in _get_referenced_stream(p_id_roots):
 
321
            yield stream
318
322
        if remaining_keys:
319
323
            trace.note('There were %d keys in the chk index, which'
320
324
                       ' were not referenced from inventories',