1
# objects.py -- Acces to base git objects
2
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3
# The header parsing code is based on that from git itself, which is
4
# Copyright (C) 2005 Linus Torvalds
5
# and licensed under v2 of the GPL.
7
# This program is free software; you can redistribute it and/or
8
# modify it under the terms of the GNU General Public License
9
# as published by the Free Software Foundation; version 2
12
# This program is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
# GNU General Public License for more details.
17
# You should have received a copy of the GNU General Public License
18
# along with this program; if not, write to the Free Software
19
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
27
from errors import (NotCommitError,
37
committer_id = "committer"
39
def _decompress(string):
40
dcomp = zlib.decompressobj()
41
dcomped = dcomp.decompress(string)
42
dcomped += dcomp.flush()
46
"""Takes a string and returns the hex of the sha within"""
49
hexsha += "%02x" % ord(c)
50
assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
54
class ShaFile(object):
57
def _update_contents(self):
58
"""Update the _contents from the _text"""
59
self._contents = [ord(c) for c in self._text]
62
def _parse_legacy_object(cls, map):
63
"""Parse a legacy object, creating it and setting object._text"""
64
text = _decompress(map)
66
for posstype in type_map.keys():
67
if text.startswith(posstype):
68
object = type_map[posstype]()
69
text = text[len(posstype):]
71
assert object is not None, "%s is not a known object type" % text[:9]
72
assert text[0] == ' ', "%s is not a space" % text[0]
76
while text[0] >= '0' and text[0] <= '9':
77
if i > 0 and size == 0:
78
assert False, "Size is not in canonical format"
79
size = (size * 10) + int(text[0])
83
assert text[0] == "\0", "Size not followed by null"
86
object._update_contents()
90
def _parse_object(cls, map):
91
"""Parse a new style object , creating it and setting object._text"""
95
num_type = (byte >> 4) & 7
97
object = num_type_map[num_type]()
99
assert False, "Not a known type: %d" % num_type
100
while((byte & 0x80) != 0):
101
byte = ord(map[used])
104
object._text = _decompress(raw)
105
object._update_contents()
109
def _parse_file(cls, map):
110
word = (ord(map[0]) << 8) + ord(map[1])
111
if ord(map[0]) == 0x78 and (word % 31) == 0:
112
return cls._parse_legacy_object(map)
114
return cls._parse_object(map)
117
"""Don't call this directly"""
119
def _parse_text(self):
120
"""For subclasses to do initialistion time parsing"""
123
def from_file(cls, filename):
124
"""Get the contents of a SHA file on disk"""
125
size = os.path.getsize(filename)
126
f = open(filename, 'rb')
128
map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
129
shafile = cls._parse_file(map)
130
shafile._parse_text()
136
def from_raw_string(cls, type, string):
137
"""Creates an object of the indicated type from the raw string given.
139
Type is the numeric type of an object. String is the raw uncompressed
142
real_class = num_type_map[type]
145
obj._update_contents()
149
return "%s %lu\0" % (self._type, len(self._contents))
152
"""The raw bytes of this object"""
153
return self._contents
156
"""The SHA1 object that is the name of this object."""
158
ressha.update(self._header())
159
ressha.update(self._text)
162
def __eq__(self, other):
163
"""Return true id the sha of the two objects match.
165
The __le__ etc methods aren't overriden as they make no sense,
166
certainly at this level.
168
return self.sha().digest() == other.sha().digest()
172
"""A Git Blob object."""
177
"""The text contained within the blob object."""
181
def from_file(cls, filename):
182
blob = ShaFile.from_file(filename)
183
if blob._type != cls._type:
184
raise NotBlobError(filename)
188
def from_string(cls, string):
189
"""Create a blob from a string."""
191
shafile._text = string
192
shafile._update_contents()
197
"""A Git tree object"""
202
def from_file(cls, filename):
203
tree = ShaFile.from_file(filename)
204
if tree._type != cls._type:
205
raise NotTreeError(filename)
209
"""Reutrn a list of tuples describing the tree entries"""
212
def _parse_text(self):
213
"""Grab the entries in the tree"""
216
while count < len(self._text):
218
chr = self._text[count]
220
assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
221
mode = (mode << 3) + (ord(chr) - ord('0'))
223
chr = self._text[count]
225
chr = self._text[count]
230
chr = self._text[count]
232
chr = self._text[count]
233
sha = self._text[count:count+20]
234
hexsha = sha_to_hex(sha)
235
self._entries.append((mode, name, hexsha))
238
class Commit(ShaFile):
239
"""A git commit object"""
244
def from_file(cls, filename):
245
commit = ShaFile.from_file(filename)
246
if commit._type != cls._type:
247
raise NotCommitError(filename)
250
def _parse_text(self):
253
assert text.startswith(tree_id), "Invlid commit object, " \
254
"must start with %s" % tree_id
255
count += len(tree_id)
256
assert text[count] == ' ', "Invalid commit object, " \
257
"%s must be followed by space not %s" % (tree_id, text[count])
259
self._tree = text[count:count+40]
261
assert text[count] == "\n", "Invalid commit object, " \
262
"tree sha must be followed by newline"
265
while text[count:].startswith(parent_id):
266
count += len(parent_id)
267
assert text[count] == ' ', "Invalid commit object, " \
268
"%s must be followed by space not %s" % (parent_id, text[count])
270
self._parents.append(text[count:count+40])
272
assert text[count] == "\n", "Invalid commit object, " \
273
"parent sha must be followed by newline"
276
if text[count:].startswith(author_id):
277
count += len(author_id)
278
assert text[count] == ' ', "Invalid commit object, " \
279
"%s must be followed by space not %s" % (author_id, text[count])
282
while text[count] != '>':
283
assert text[count] != '\n', "Malformed author information"
284
self._author += text[count]
286
self._author += text[count]
288
while text[count] != '\n':
291
self._committer = None
292
if text[count:].startswith(committer_id):
293
count += len(committer_id)
294
assert text[count] == ' ', "Invalid commit object, " \
295
"%s must be followed by space not %s" % (committer_id, text[count])
298
while text[count] != '>':
299
assert text[count] != '\n', "Malformed committer information"
300
self._committer += text[count]
302
self._committer += text[count]
304
assert text[count] == ' ', "Invalid commit object, " \
305
"commiter information must be followed by space not %s" % text[count]
307
self._commit_time = int(text[count:count+10])
308
while text[count] != '\n':
311
assert text[count] == '\n', "There must be a new line after the headers"
313
# XXX: There can be an encoding field.
314
self._message = text[count:]
317
"""Returns the tree that is the state of this commit"""
321
"""Return a list of parents of this commit."""
325
"""Returns the name of the author of the commit"""
329
"""Returns the name of the committer of the commit"""
330
return self._committer
333
"""Returns the commit message"""
336
def commit_time(self):
337
"""Returns the timestamp of the commit.
339
Returns it as the number of seconds since the epoch.
341
return self._commit_time