1
# objects.py -- Acces to base git objects
2
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3
# The header parsing code is based on that from git itself, which is
4
# Copyright (C) 2005 Linus Torvalds
5
# and licensed under v2 of the GPL.
7
# This program is free software; you can redistribute it and/or
8
# modify it under the terms of the GNU General Public License
9
# as published by the Free Software Foundation; version 2
12
# This program is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
# GNU General Public License for more details.
17
# You should have received a copy of the GNU General Public License
18
# along with this program; if not, write to the Free Software
19
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
27
from errors import (NotCommitError,
38
COMMITTER_ID = "committer"
40
def _decompress(string):
41
dcomp = zlib.decompressobj()
42
dcomped = dcomp.decompress(string)
43
dcomped += dcomp.flush()
47
"""Takes a string and returns the hex of the sha within"""
50
hexsha += "%02x" % ord(c)
51
assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
56
class ShaFile(object):
60
def _parse_legacy_object(cls, map):
61
"""Parse a legacy object, creating it and setting object._text"""
62
text = _decompress(map)
64
for posstype in type_map.keys():
65
if text.startswith(posstype):
66
object = type_map[posstype]()
67
text = text[len(posstype):]
69
assert object is not None, "%s is not a known object type" % text[:9]
70
assert text[0] == ' ', "%s is not a space" % text[0]
74
while text[0] >= '0' and text[0] <= '9':
75
if i > 0 and size == 0:
76
assert False, "Size is not in canonical format"
77
size = (size * 10) + int(text[0])
81
assert text[0] == "\0", "Size not followed by null"
86
def as_raw_string(self):
87
return self._num_type, self._text
90
def _parse_object(cls, map):
91
"""Parse a new style object , creating it and setting object._text"""
95
num_type = (byte >> 4) & 7
97
object = num_type_map[num_type]()
99
assert False, "Not a known type: %d" % num_type
100
while((byte & 0x80) != 0):
101
byte = ord(map[used])
104
object._text = _decompress(raw)
108
def _parse_file(cls, map):
109
word = (ord(map[0]) << 8) + ord(map[1])
110
if ord(map[0]) == 0x78 and (word % 31) == 0:
111
return cls._parse_legacy_object(map)
113
return cls._parse_object(map)
116
"""Don't call this directly"""
118
def _parse_text(self):
119
"""For subclasses to do initialisation time parsing"""
122
def from_file(cls, filename):
123
"""Get the contents of a SHA file on disk"""
124
size = os.path.getsize(filename)
125
f = open(filename, 'rb')
127
map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
128
shafile = cls._parse_file(map)
129
shafile._parse_text()
135
def from_raw_string(cls, type, string):
136
"""Creates an object of the indicated type from the raw string given.
138
Type is the numeric type of an object. String is the raw uncompressed
141
real_class = num_type_map[type]
149
return "%s %lu\0" % (self._type, len(self._text))
152
return zlib.crc32(self._text)
155
"""The SHA1 object that is the name of this object."""
157
ressha.update(self._header())
158
ressha.update(self._text)
163
return self.sha().hexdigest()
166
return "<%s %s>" % (self.__class__.__name__, self.id)
168
def __eq__(self, other):
169
"""Return true id the sha of the two objects match.
171
The __le__ etc methods aren't overriden as they make no sense,
172
certainly at this level.
174
return self.sha().digest() == other.sha().digest()
178
"""A Git Blob object."""
184
"""The text contained within the blob object."""
188
def from_file(cls, filename):
189
blob = ShaFile.from_file(filename)
190
if blob._type != cls._type:
191
raise NotBlobError(filename)
195
def from_string(cls, string):
196
"""Create a blob from a string."""
198
shafile._text = string
203
"""A Git Tag object."""
208
def from_file(cls, filename):
209
blob = ShaFile.from_file(filename)
210
if blob._type != cls._type:
211
raise NotBlobError(filename)
215
def from_string(cls, string):
216
"""Create a blob from a string."""
218
shafile._text = string
223
"""A Git tree object"""
228
def from_file(cls, filename):
229
tree = ShaFile.from_file(filename)
230
if tree._type != cls._type:
231
raise NotTreeError(filename)
235
"""Return a list of tuples describing the tree entries"""
238
def _parse_text(self):
239
"""Grab the entries in the tree"""
242
while count < len(self._text):
244
chr = self._text[count]
246
assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
247
mode = (mode << 3) + (ord(chr) - ord('0'))
249
chr = self._text[count]
251
chr = self._text[count]
256
chr = self._text[count]
258
chr = self._text[count]
259
sha = self._text[count:count+20]
260
hexsha = sha_to_hex(sha)
261
self._entries.append((mode, name, hexsha))
264
class Commit(ShaFile):
265
"""A git commit object"""
270
def from_file(cls, filename):
271
commit = ShaFile.from_file(filename)
272
if commit._type != cls._type:
273
raise NotCommitError(filename)
276
def _parse_text(self):
279
assert text.startswith(TREE_ID), "Invalid commit object, " \
280
"must start with %s" % TREE_ID
281
count += len(TREE_ID)
282
assert text[count] == ' ', "Invalid commit object, " \
283
"%s must be followed by space not %s" % (TREE_ID, text[count])
285
self._tree = text[count:count+40]
287
assert text[count] == "\n", "Invalid commit object, " \
288
"tree sha must be followed by newline"
291
while text[count:].startswith(PARENT_ID):
292
count += len(PARENT_ID)
293
assert text[count] == ' ', "Invalid commit object, " \
294
"%s must be followed by space not %s" % (PARENT_ID, text[count])
296
self._parents.append(text[count:count+40])
298
assert text[count] == "\n", "Invalid commit object, " \
299
"parent sha must be followed by newline"
302
if text[count:].startswith(AUTHOR_ID):
303
count += len(AUTHOR_ID)
304
assert text[count] == ' ', "Invalid commit object, " \
305
"%s must be followed by space not %s" % (AUTHOR_ID, text[count])
308
while text[count] != '>':
309
assert text[count] != '\n', "Malformed author information"
310
self._author += text[count]
312
self._author += text[count]
314
while text[count] != '\n':
317
self._committer = None
318
if text[count:].startswith(COMMITTER_ID):
319
count += len(COMMITTER_ID)
320
assert text[count] == ' ', "Invalid commit object, " \
321
"%s must be followed by space not %s" % (COMMITTER_ID, text[count])
324
while text[count] != '>':
325
assert text[count] != '\n', "Malformed committer information"
326
self._committer += text[count]
328
self._committer += text[count]
330
assert text[count] == ' ', "Invalid commit object, " \
331
"commiter information must be followed by space not %s" % text[count]
333
self._commit_time = int(text[count:count+10])
334
while text[count] != '\n':
337
assert text[count] == '\n', "There must be a new line after the headers"
339
# XXX: There can be an encoding field.
340
self._message = text[count:]
344
"""Returns the tree that is the state of this commit"""
349
"""Return a list of parents of this commit."""
354
"""Returns the name of the author of the commit"""
359
"""Returns the name of the committer of the commit"""
360
return self._committer
364
"""Returns the commit message"""
368
def commit_time(self):
369
"""Returns the timestamp of the commit.
371
Returns it as the number of seconds since the epoch.
373
return self._commit_time
388
# 5 Is reserved for further expansion