/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.211.9 by James Westby
Add some basic pack handling code.
1
# pack.py -- For dealing wih packed git objects.
2
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
0.211.20 by Jelmer Vernooij
Change project name to dulwich everywhere, add assertion.
3
# Copryight (C) 2008 Jelmer Vernooij <jelmer@samba.org>
0.211.9 by James Westby
Add some basic pack handling code.
4
# The code is loosely based on that in the sha1_file.c file from git itself,
5
# which is Copyright (C) Linus Torvalds, 2005 and distributed under the
6
# GPL version 2.
7
# 
8
# This program is free software; you can redistribute it and/or
9
# modify it under the terms of the GNU General Public License
10
# as published by the Free Software Foundation; version 2
11
# of the License.
12
# 
13
# This program is distributed in the hope that it will be useful,
14
# but WITHOUT ANY WARRANTY; without even the implied warranty of
15
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
# GNU General Public License for more details.
17
# 
18
# You should have received a copy of the GNU General Public License
19
# along with this program; if not, write to the Free Software
20
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21
# MA  02110-1301, USA.
22
23
"""Classes for dealing with packed git objects.
24
25
A pack is a compact representation of a bunch of objects, stored
26
using deltas where possible.
27
28
They have two parts, the pack file, which stores the data, and an index
29
that tells you where the data is.
30
31
To find an object you look in all of the index files 'til you find a
32
match for the object name. You then use the pointer got from this as
33
a pointer in to the corresponding packfile.
34
"""
35
0.211.26 by Jelmer Vernooij
Write fan out table correctly.
36
from collections import defaultdict
0.211.23 by Jelmer Vernooij
Add functions for checking checksums of index files.
37
import hashlib
0.211.9 by James Westby
Add some basic pack handling code.
38
import mmap
39
import os
0.211.21 by Jelmer Vernooij
Read pack header.
40
import struct
0.211.17 by Jelmer Vernooij
Add simple mmap wrapper.
41
import sys
42
43
supports_mmap_offset = (sys.version_info[0] >= 3 or 
44
        (sys.version_info[0] == 2 and sys.version_info[1] >= 6))
0.211.9 by James Westby
Add some basic pack handling code.
45
46
from objects import (ShaFile,
47
                     _decompress,
48
                     )
49
0.211.24 by Jelmer Vernooij
Add iterentries().
50
def hex_to_sha(hex):
51
  ret = ""
52
  for i in range(0, len(hex), 2):
53
    ret += chr(int(hex[i:i+2], 16))
54
  return ret
0.211.9 by James Westby
Add some basic pack handling code.
55
0.211.35 by Jelmer Vernooij
Add simple pack dump utility.
56
def sha_to_hex(sha):
57
  ret = ""
58
  for i in sha:
59
      ret += "%02x" % ord(i)
60
  return ret
61
0.211.20 by Jelmer Vernooij
Change project name to dulwich everywhere, add assertion.
62
MAX_MMAP_SIZE = 256 * 1024 * 1024
63
0.211.17 by Jelmer Vernooij
Add simple mmap wrapper.
64
def simple_mmap(f, offset, size, access=mmap.ACCESS_READ):
65
    if offset+size > MAX_MMAP_SIZE and not supports_mmap_offset:
66
        raise AssertionError("%s is larger than 256 meg, and this version "
67
            "of Python does not support the offset argument to mmap().")
68
    if supports_mmap_offset:
0.211.19 by Jelmer Vernooij
Provide replacement for mmap()'s offset argument.
69
        return mmap.mmap(f.fileno(), size, access=access, offset=offset)
0.211.17 by Jelmer Vernooij
Add simple mmap wrapper.
70
    else:
0.211.19 by Jelmer Vernooij
Provide replacement for mmap()'s offset argument.
71
        class ArraySkipper(object):
72
73
            def __init__(self, array, offset):
74
                self.array = array
75
                self.offset = offset
76
77
            def __getslice__(self, i, j):
78
                return self.array[i+self.offset:j+self.offset]
79
80
            def __getitem__(self, i):
81
                return self.array[i+self.offset]
82
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
83
            def __len__(self):
84
                return len(self.array) - self.offset
85
0.211.23 by Jelmer Vernooij
Add functions for checking checksums of index files.
86
            def __str__(self):
87
                return str(self.array[self.offset:])
88
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
89
        mem = mmap.mmap(f.fileno(), size+offset, access=access)
0.211.19 by Jelmer Vernooij
Provide replacement for mmap()'s offset argument.
90
        if offset == 0:
91
            return mem
92
        return ArraySkipper(mem, offset)
0.211.17 by Jelmer Vernooij
Add simple mmap wrapper.
93
94
0.211.9 by James Westby
Add some basic pack handling code.
95
def multi_ord(map, start, count):
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
96
    value = 0
97
    for i in range(count):
98
        value = value * 0x100 + ord(map[start+i])
99
    return value
100
0.211.9 by James Westby
Add some basic pack handling code.
101
102
class PackIndex(object):
103
  """An index in to a packfile.
104
105
  Given a sha id of an object a pack index can tell you the location in the
106
  packfile of that object if it has it.
107
0.211.20 by Jelmer Vernooij
Change project name to dulwich everywhere, add assertion.
108
  To do the loop it opens the file, and indexes first 256 4 byte groups
0.211.9 by James Westby
Add some basic pack handling code.
109
  with the first byte of the sha id. The value in the four byte group indexed
110
  is the end of the group that shares the same starting byte. Subtract one
111
  from the starting byte and index again to find the start of the group.
112
  The values are sorted by sha id within the group, so do the math to find
113
  the start and end offset and then bisect in to find if the value is present.
114
  """
115
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
116
  PACK_INDEX_HEADER_SIZE = 0x100 * 4
0.211.9 by James Westby
Add some basic pack handling code.
117
  sha_bytes = 20
0.211.24 by Jelmer Vernooij
Add iterentries().
118
  record_size = sha_bytes + 4
0.211.9 by James Westby
Add some basic pack handling code.
119
120
  def __init__(self, filename):
121
    """Create a pack index object.
122
123
    Provide it with the name of the index file to consider, and it will map
124
    it whenever required.
125
    """
126
    self._filename = filename
127
    assert os.path.exists(filename), "%s is not a pack index" % filename
128
    # Take the size now, so it can be checked each time we map the file to
129
    # ensure that it hasn't changed.
130
    self._size = os.path.getsize(filename)
0.211.24 by Jelmer Vernooij
Add iterentries().
131
    self._file = open(filename, 'r')
132
    self._contents = simple_mmap(self._file, 0, self._size)
0.211.32 by Jelmer Vernooij
Fix v2 index files.
133
    if self._contents[:4] != '\377tOc':
134
        self.version = 1
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
135
        self._fan_out_table = self._read_fan_out_table(0)
136
    else:
0.211.32 by Jelmer Vernooij
Fix v2 index files.
137
        (self.version, ) = struct.unpack_from(">L", self._contents, 4)
138
        assert self.version in (2,), "Version was %d" % self.version
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
139
        self._fan_out_table = self._read_fan_out_table(8)
0.211.31 by Jelmer Vernooij
More work on v2 packs.
140
        self._name_table_offset = 8 + 0x100 * 4
141
        self._crc32_table_offset = self._name_table_offset + 20 * len(self)
142
        self._pack_offset_table_offset = self._crc32_table_offset + 4 * len(self)
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
143
0.211.24 by Jelmer Vernooij
Add iterentries().
144
  def close(self):
145
    self._file.close()
146
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
147
  def __len__(self):
0.211.24 by Jelmer Vernooij
Add iterentries().
148
    """Return the number of entries in this pack index."""
0.211.31 by Jelmer Vernooij
More work on v2 packs.
149
    return self._fan_out_table[-1]
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
150
0.211.24 by Jelmer Vernooij
Add iterentries().
151
  def _unpack_entry(self, i):
152
    """Unpack the i-th entry in the index file.
153
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
154
    :return: Tuple with object name (SHA), offset in pack file and 
155
          CRC32 checksum (if known)."""
0.211.32 by Jelmer Vernooij
Fix v2 index files.
156
    if self.version == 1:
0.211.31 by Jelmer Vernooij
More work on v2 packs.
157
        (offset, name) = struct.unpack_from(">L20s", self._contents, 
158
            self.PACK_INDEX_HEADER_SIZE + (i * self.record_size))
159
        return (name, offset, None)
160
    else:
161
        return (self._unpack_name(i), self._unpack_offset(i), 
162
                self._unpack_crc32_checksum(i))
163
164
  def _unpack_name(self, i):
0.211.32 by Jelmer Vernooij
Fix v2 index files.
165
    if self.version == 1:
0.211.31 by Jelmer Vernooij
More work on v2 packs.
166
        return self._unpack_entry(i)[0]
167
    else:
168
        return struct.unpack_from("20s", self._contents, 
0.211.32 by Jelmer Vernooij
Fix v2 index files.
169
                                  self._name_table_offset + i * 20)[0]
0.211.31 by Jelmer Vernooij
More work on v2 packs.
170
171
  def _unpack_offset(self, i):
0.211.32 by Jelmer Vernooij
Fix v2 index files.
172
    if self.version == 1:
0.211.31 by Jelmer Vernooij
More work on v2 packs.
173
        return self._unpack_entry(i)[1]
174
    else:
175
        return struct.unpack_from(">L", self._contents, 
0.211.32 by Jelmer Vernooij
Fix v2 index files.
176
                                  self._pack_offset_table_offset + i * 4)[0]
0.211.31 by Jelmer Vernooij
More work on v2 packs.
177
178
  def _unpack_crc32_checksum(self, i):
0.211.32 by Jelmer Vernooij
Fix v2 index files.
179
    if self.version == 1:
0.211.31 by Jelmer Vernooij
More work on v2 packs.
180
        return None
181
    else:
182
        return struct.unpack_from(">L", self._contents, 
0.211.32 by Jelmer Vernooij
Fix v2 index files.
183
                                  self._crc32_table_offset + i * 4)[0]
0.211.24 by Jelmer Vernooij
Add iterentries().
184
185
  def iterentries(self):
186
    """Iterate over the entries in this pack index.
187
   
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
188
    Will yield tuples with object name, offset in packfile and crc32 checksum.
0.211.24 by Jelmer Vernooij
Add iterentries().
189
    """
190
    for i in range(len(self)):
191
        yield self._unpack_entry(i)
192
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
193
  def _read_fan_out_table(self, start_offset):
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
194
    ret = []
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
195
    for i in range(0x100):
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
196
        ret.append(struct.unpack(">L", self._contents[start_offset+i*4:start_offset+(i+1)*4])[0])
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
197
    return ret
198
0.211.23 by Jelmer Vernooij
Add functions for checking checksums of index files.
199
  def check(self):
200
    """Check that the stored checksum matches the actual checksum."""
0.211.28 by Jelmer Vernooij
Cope with pack checksum in index file.
201
    return self.calculate_checksum() == self.get_stored_checksums()[1]
0.211.23 by Jelmer Vernooij
Add functions for checking checksums of index files.
202
0.211.28 by Jelmer Vernooij
Cope with pack checksum in index file.
203
  def calculate_checksum(self):
0.211.23 by Jelmer Vernooij
Add functions for checking checksums of index files.
204
    f = open(self._filename, 'r')
205
    try:
0.211.24 by Jelmer Vernooij
Add iterentries().
206
        return hashlib.sha1(self._contents[:-20]).digest()
0.211.23 by Jelmer Vernooij
Add functions for checking checksums of index files.
207
    finally:
208
        f.close()
209
0.211.28 by Jelmer Vernooij
Cope with pack checksum in index file.
210
  def get_stored_checksums(self):
211
    """Return the SHA1 checksums stored for the corresponding packfile and 
212
    this header file itself."""
213
    return str(self._contents[-40:-20]), str(self._contents[-20:])
0.211.9 by James Westby
Add some basic pack handling code.
214
215
  def object_index(self, sha):
216
    """Return the index in to the corresponding packfile for the object.
217
218
    Given the name of an object it will return the offset that object lives
219
    at within the corresponding pack file. If the pack file doesn't have the
220
    object then None will be returned.
221
    """
222
    size = os.path.getsize(self._filename)
223
    assert size == self._size, "Pack index %s has changed size, I don't " \
224
         "like that" % self._filename
0.211.24 by Jelmer Vernooij
Add iterentries().
225
    return self._object_index(sha)
0.211.9 by James Westby
Add some basic pack handling code.
226
0.211.24 by Jelmer Vernooij
Add iterentries().
227
  def _object_index(self, hexsha):
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
228
      """See object_index"""
229
      sha = hex_to_sha(hexsha)
0.211.24 by Jelmer Vernooij
Add iterentries().
230
      start = self._fan_out_table[ord(sha[0])-1]
231
      end = self._fan_out_table[ord(sha[0])]
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
232
      while start < end:
233
        i = (start + end)/2
0.211.31 by Jelmer Vernooij
More work on v2 packs.
234
        file_sha = self._unpack_name(i)
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
235
        if file_sha == sha:
0.211.31 by Jelmer Vernooij
More work on v2 packs.
236
          return self._unpack_offset(i)
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
237
        elif file_sha < sha:
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
238
          start = i + 1
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
239
        else:
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
240
          end = i - 1
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
241
      return None
0.211.9 by James Westby
Add some basic pack handling code.
242
243
244
class PackData(object):
245
  """The data contained in a packfile.
246
247
  Pack files can be accessed both sequentially for exploding a pack, and
248
  directly with the help of an index to retrieve a specific object.
249
250
  The objects within are either complete or a delta aginst another.
251
252
  The header is variable length. If the MSB of each byte is set then it
253
  indicates that the subsequent byte is still part of the header.
254
  For the first byte the next MS bits are the type, which tells you the type
255
  of object, and whether it is a delta. The LS byte is the lowest bits of the
256
  size. For each subsequent byte the LS 7 bits are the next MS bits of the
257
  size, i.e. the last byte of the header contains the MS bits of the size.
258
259
  For the complete objects the data is stored as zlib deflated data.
260
  The size in the header is the uncompressed object size, so to uncompress
261
  you need to just keep feeding data to zlib until you get an object back,
262
  or it errors on bad data. This is done here by just giving the complete
263
  buffer from the start of the deflated object on. This is bad, but until I
264
  get mmap sorted out it will have to do.
265
266
  Currently there are no integrity checks done. Also no attempt is made to try
267
  and detect the delta case, or a request for an object at the wrong position.
268
  It will all just throw a zlib or KeyError.
269
  """
270
271
  def __init__(self, filename):
272
    """Create a PackData object that represents the pack in the given filename.
273
274
    The file must exist and stay readable until the object is disposed of. It
275
    must also stay the same size. It will be mapped whenever needed.
276
277
    Currently there is a restriction on the size of the pack as the python
278
    mmap implementation is flawed.
279
    """
280
    self._filename = filename
281
    assert os.path.exists(filename), "%s is not a packfile" % filename
282
    self._size = os.path.getsize(filename)
0.211.21 by Jelmer Vernooij
Read pack header.
283
    self._read_header()
284
285
  def _read_header(self):
286
    f = open(self._filename, 'rb')
287
    try:
288
        header = f.read(12)
0.211.34 by Jelmer Vernooij
Implement PackData.check().
289
        f.seek(self._size-20)
290
        self._stored_checksum = f.read(20)
0.211.21 by Jelmer Vernooij
Read pack header.
291
    finally:
292
        f.close()
293
    assert header[:4] == "PACK"
294
    (version,) = struct.unpack_from(">L", header, 4)
295
    assert version in (2, 3), "Version was %d" % version
296
    (self._num_objects,) = struct.unpack_from(">L", header, 8)
297
298
  def __len__(self):
299
      """Returns the number of objects in this pack."""
300
      return self._num_objects
0.211.9 by James Westby
Add some basic pack handling code.
301
0.211.34 by Jelmer Vernooij
Implement PackData.check().
302
  def calculate_checksum(self):
303
    f = open(self._filename, 'rb')
304
    try:
305
        map = simple_mmap(f, 0, self._size)
306
        return hashlib.sha1(map[:-20]).digest()
307
    finally:
308
        f.close()
309
310
  def check(self):
311
    return (self.calculate_checksum() == self._stored_checksum)
312
0.211.9 by James Westby
Add some basic pack handling code.
313
  def get_object_at(self, offset):
314
    """Given an offset in to the packfile return the object that is there.
315
316
    Using the associated index the location of an object can be looked up, and
317
    then the packfile can be asked directly for that object using this
318
    function.
319
320
    Currently only non-delta objects are supported.
321
    """
0.211.20 by Jelmer Vernooij
Change project name to dulwich everywhere, add assertion.
322
    assert isinstance(offset, long) or isinstance(offset, int)
0.211.9 by James Westby
Add some basic pack handling code.
323
    size = os.path.getsize(self._filename)
324
    assert size == self._size, "Pack data %s has changed size, I don't " \
325
         "like that" % self._filename
326
    f = open(self._filename, 'rb')
327
    try:
0.211.23 by Jelmer Vernooij
Add functions for checking checksums of index files.
328
      map = simple_mmap(f, offset, size-offset)
0.211.19 by Jelmer Vernooij
Provide replacement for mmap()'s offset argument.
329
      return self._get_object_at(map)
0.211.9 by James Westby
Add some basic pack handling code.
330
    finally:
331
      f.close()
332
0.211.19 by Jelmer Vernooij
Provide replacement for mmap()'s offset argument.
333
  def _get_object_at(self, map):
334
    first_byte = ord(map[0])
0.211.9 by James Westby
Add some basic pack handling code.
335
    sign_extend = first_byte & 0x80
336
    type = (first_byte >> 4) & 0x07
337
    size = first_byte & 0x0f
338
    cur_offset = 0
339
    while sign_extend > 0:
0.211.19 by Jelmer Vernooij
Provide replacement for mmap()'s offset argument.
340
      byte = ord(map[cur_offset+1])
0.211.9 by James Westby
Add some basic pack handling code.
341
      sign_extend = byte & 0x80
342
      size_part = byte & 0x7f
343
      size += size_part << ((cur_offset * 7) + 4)
344
      cur_offset += 1
0.211.19 by Jelmer Vernooij
Provide replacement for mmap()'s offset argument.
345
    raw_base = cur_offset+1
0.211.9 by James Westby
Add some basic pack handling code.
346
    # The size is the inflated size, so we have no idea what the deflated size
347
    # is, so for now give it as much as we have. It should really iterate
348
    # feeding it more data if it doesn't decompress, but as we have the whole
349
    # thing then just use it.
350
    raw = map[raw_base:]
351
    uncomp = _decompress(raw)
352
    obj = ShaFile.from_raw_string(type, uncomp)
353
    return obj
354
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
355
0.211.34 by Jelmer Vernooij
Implement PackData.check().
356
class SHA1Writer(object):
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
357
    
358
    def __init__(self, f):
359
        self.f = f
360
        self.sha1 = hashlib.sha1("")
361
362
    def write(self, data):
363
        self.sha1.update(data)
364
        self.f.write(data)
365
366
    def close(self):
367
        sha = self.sha1.digest()
368
        assert len(sha) == 20
369
        self.f.write(sha)
370
        self.f.close()
371
        return sha
372
373
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
374
def write_pack(filename, objects):
375
    """Write a new pack file.
376
377
    :param filename: The filename of the new pack file.
378
    :param objects: List of objects to write.
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
379
    :return: List with (name, offset, crc32 checksum) entries, pack checksum
0.211.22 by Jelmer Vernooij
Read fan-out table always, read file sha1.
380
    """
381
    f = open(filename, 'w')
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
382
    entries = []
383
    f = SHA1Writer(f)
384
    f.write("PACK")               # Pack header
385
    f.write(struct.pack(">L", 2)) # Pack version
386
    f.write(struct.pack(">L", len(objects))) # Number of objects in pack
387
    for o in objects:
388
        pass # FIXME: Write object
389
    return entries, f.close()
0.211.25 by Jelmer Vernooij
Implement pack index writing.
390
391
0.211.31 by Jelmer Vernooij
More work on v2 packs.
392
def write_pack_index_v1(filename, entries, pack_checksum):
0.211.25 by Jelmer Vernooij
Implement pack index writing.
393
    """Write a new pack index file.
394
395
    :param filename: The filename of the new pack index file.
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
396
    :param entries: List of tuples with object name (sha), offset_in_pack,  and
397
            crc32_checksum.
0.211.28 by Jelmer Vernooij
Cope with pack checksum in index file.
398
    :param pack_checksum: Checksum of the pack file.
0.211.25 by Jelmer Vernooij
Implement pack index writing.
399
    """
400
    # Sort entries first
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
401
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
402
    entries = sorted(entries)
0.211.25 by Jelmer Vernooij
Implement pack index writing.
403
    f = open(filename, 'w')
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
404
    f = SHA1Writer(f)
0.211.26 by Jelmer Vernooij
Write fan out table correctly.
405
    fan_out_table = defaultdict(lambda: 0)
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
406
    for (name, offset, entry_checksum) in entries:
0.211.31 by Jelmer Vernooij
More work on v2 packs.
407
        fan_out_table[ord(name[0])] += 1
0.211.25 by Jelmer Vernooij
Implement pack index writing.
408
    # Fan-out table
409
    for i in range(0x100):
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
410
        f.write(struct.pack(">L", fan_out_table[i]))
0.211.31 by Jelmer Vernooij
More work on v2 packs.
411
        fan_out_table[i+1] += fan_out_table[i]
0.211.30 by Jelmer Vernooij
Allow returning CRC32 checksums, as done by pack index v2.
412
    for (name, offset, entry_checksum) in entries:
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
413
        f.write(struct.pack(">L20s", offset, name))
0.211.28 by Jelmer Vernooij
Cope with pack checksum in index file.
414
    assert len(pack_checksum) == 20
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
415
    f.write(pack_checksum)
0.211.25 by Jelmer Vernooij
Implement pack index writing.
416
    f.close()
0.211.31 by Jelmer Vernooij
More work on v2 packs.
417
418
419
def write_pack_index_v2(filename, entries, pack_checksum):
420
    """Write a new pack index file.
421
422
    :param filename: The filename of the new pack index file.
423
    :param entries: List of tuples with object name (sha), offset_in_pack,  and
424
            crc32_checksum.
425
    :param pack_checksum: Checksum of the pack file.
426
    """
427
    # Sort entries first
428
    entries = sorted(entries)
429
    f = open(filename, 'w')
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
430
    f = SHA1Writer(f)
431
    f.write('\377tOc')
432
    f.write(struct.pack(">L", 2))
0.211.31 by Jelmer Vernooij
More work on v2 packs.
433
    fan_out_table = defaultdict(lambda: 0)
434
    for (name, offset, entry_checksum) in entries:
435
        fan_out_table[ord(name[0])] += 1
436
    # Fan-out table
437
    for i in range(0x100):
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
438
        f.write(struct.pack(">L", fan_out_table[i]))
0.211.31 by Jelmer Vernooij
More work on v2 packs.
439
        fan_out_table[i+1] += fan_out_table[i]
440
    for (name, offset, entry_checksum) in entries:
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
441
        f.write(name)
0.211.31 by Jelmer Vernooij
More work on v2 packs.
442
    for (name, offset, entry_checksum) in entries:
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
443
        f.write(struct.pack(">L", entry_checksum))
0.211.31 by Jelmer Vernooij
More work on v2 packs.
444
    for (name, offset, entry_checksum) in entries:
445
        # FIXME: handle if MSBit is set in offset
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
446
        f.write(struct.pack(">L", offset))
0.211.31 by Jelmer Vernooij
More work on v2 packs.
447
    # FIXME: handle table for pack files > 8 Gb
448
    assert len(pack_checksum) == 20
0.211.33 by Jelmer Vernooij
Use common object for writing sha1 files.
449
    f.write(pack_checksum)
0.211.31 by Jelmer Vernooij
More work on v2 packs.
450
    f.close()
451