1
# objects.py -- Acces to base git objects
2
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3
# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
4
# The header parsing code is based on that from git itself, which is
5
# Copyright (C) 2005 Linus Torvalds
6
# and licensed under v2 of the GPL.
8
# This program is free software; you can redistribute it and/or
9
# modify it under the terms of the GNU General Public License
10
# as published by the Free Software Foundation; version 2
13
# This program is distributed in the hope that it will be useful,
14
# but WITHOUT ANY WARRANTY; without even the implied warranty of
15
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
# GNU General Public License for more details.
18
# You should have received a copy of the GNU General Public License
19
# along with this program; if not, write to the Free Software
20
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
28
from errors import (NotCommitError,
39
COMMITTER_ID = "committer"
44
def _decompress(string):
45
dcomp = zlib.decompressobj()
46
dcomped = dcomp.decompress(string)
47
dcomped += dcomp.flush()
51
"""Takes a string and returns the hex of the sha within"""
54
hexsha += "%02x" % ord(c)
55
assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
60
"""Takes a hex sha and returns a binary sha"""
62
for i in range(0, len(hex), 2):
63
sha += chr(int(hex[i:i+2], 16))
64
assert len(sha) == 20, "Incorrent length of sha1: %d" % len(sha)
67
class ShaFile(object):
71
def _parse_legacy_object(cls, map):
72
"""Parse a legacy object, creating it and setting object._text"""
73
text = _decompress(map)
75
for posstype in type_map.keys():
76
if text.startswith(posstype):
77
object = type_map[posstype]()
78
text = text[len(posstype):]
80
assert object is not None, "%s is not a known object type" % text[:9]
81
assert text[0] == ' ', "%s is not a space" % text[0]
85
while text[0] >= '0' and text[0] <= '9':
86
if i > 0 and size == 0:
87
assert False, "Size is not in canonical format"
88
size = (size * 10) + int(text[0])
92
assert text[0] == "\0", "Size not followed by null"
97
def as_raw_string(self):
98
return self._num_type, self._text
101
def _parse_object(cls, map):
102
"""Parse a new style object , creating it and setting object._text"""
104
byte = ord(map[used])
106
num_type = (byte >> 4) & 7
108
object = num_type_map[num_type]()
110
assert False, "Not a known type: %d" % num_type
111
while((byte & 0x80) != 0):
112
byte = ord(map[used])
115
object._text = _decompress(raw)
119
def _parse_file(cls, map):
120
word = (ord(map[0]) << 8) + ord(map[1])
121
if ord(map[0]) == 0x78 and (word % 31) == 0:
122
return cls._parse_legacy_object(map)
124
return cls._parse_object(map)
127
"""Don't call this directly"""
129
def _parse_text(self):
130
"""For subclasses to do initialisation time parsing"""
133
def from_file(cls, filename):
134
"""Get the contents of a SHA file on disk"""
135
size = os.path.getsize(filename)
136
f = open(filename, 'rb')
138
map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
139
shafile = cls._parse_file(map)
140
shafile._parse_text()
146
def from_raw_string(cls, type, string):
147
"""Creates an object of the indicated type from the raw string given.
149
Type is the numeric type of an object. String is the raw uncompressed
152
real_class = num_type_map[type]
160
return "%s %lu\0" % (self._type, len(self._text))
163
return zlib.crc32(self._text)
166
"""The SHA1 object that is the name of this object."""
168
ressha.update(self._header())
169
ressha.update(self._text)
174
return self.sha().hexdigest()
177
return "<%s %s>" % (self.__class__.__name__, self.id)
179
def __eq__(self, other):
180
"""Return true id the sha of the two objects match.
182
The __le__ etc methods aren't overriden as they make no sense,
183
certainly at this level.
185
return self.sha().digest() == other.sha().digest()
189
"""A Git Blob object."""
196
"""The text contained within the blob object."""
200
def from_file(cls, filename):
201
blob = ShaFile.from_file(filename)
202
if blob._type != cls._type:
203
raise NotBlobError(filename)
207
def from_string(cls, string):
208
"""Create a blob from a string."""
210
shafile._text = string
215
"""A Git Tag object."""
220
def from_file(cls, filename):
221
blob = ShaFile.from_file(filename)
222
if blob._type != cls._type:
223
raise NotBlobError(filename)
227
def from_string(cls, string):
228
"""Create a blob from a string."""
230
shafile._text = string
233
def _parse_text(self):
234
"""Grab the metadata attached to the tag"""
237
assert text.startswith(OBJECT_ID), "Invalid tag object, " \
238
"must start with %s" % OBJECT_ID
239
count += len(OBJECT_ID)
240
assert text[count] == ' ', "Invalid tag object, " \
241
"%s must be followed by space not %s" % (OBJECT_ID, text[count])
243
self._object_sha = text[count:count+40]
245
assert text[count] == '\n', "Invalid tag object, " \
246
"%s sha must be followed by newline" % OBJECT_ID
248
assert text[count:].startswith(TYPE_ID), "Invalid tag object, " \
249
"%s sha must be followed by %s" % (OBJECT_ID, TYPE_ID)
250
count += len(TYPE_ID)
251
assert text[count] == ' ', "Invalid tag object, " \
252
"%s must be followed by space not %s" % (TAG_ID, text[count])
254
self._object_type = ""
255
while text[count] != '\n':
256
self._object_type += text[count]
259
assert self._object_type in (COMMIT_ID, BLOB_ID, TREE_ID, TAG_ID), "Invalid tag object, " \
260
"unexpected object type %s" % self._object_type
261
self._object_type = type_map[self._object_type]
263
assert text[count:].startswith(TAG_ID), "Invalid tag object, " \
264
"object type must be followed by %s" % (TAG_ID)
266
assert text[count] == ' ', "Invalid tag object, " \
267
"%s must be followed by space not %s" % (TAG_ID, text[count])
270
while text[count] != '\n':
271
self._name += text[count]
275
assert text[count:].startswith(TAGGER_ID), "Invalid tag object, " \
276
"%s must be followed by %s" % (TAG_ID, TAGGER_ID)
277
count += len(TAGGER_ID)
278
assert text[count] == ' ', "Invalid tag object, " \
279
"%s must be followed by space not %s" % (TAGGER_ID, text[count])
282
while text[count] != '>':
283
assert text[count] != '\n', "Malformed tagger information"
284
self._tagger += text[count]
286
self._tagger += text[count]
288
assert text[count] == ' ', "Invalid tag object, " \
289
"tagger information must be followed by space not %s" % text[count]
291
self._tag_time = int(text[count:count+10])
292
while text[count] != '\n':
295
assert text[count] == '\n', "There must be a new line after the headers"
297
self._message = text[count:]
301
"""Returns the object pointed by this tag, represented as a tuple(type, sha)"""
302
return (self._object_type, self._object_sha)
306
"""Returns the name of this tag"""
311
"""Returns the name of the person who created this tag"""
316
"""Returns the creation timestamp of the tag.
318
Returns it as the number of seconds since the epoch"""
319
return self._tag_time
323
"""Returns the message attached to this tag"""
328
"""A Git tree object"""
337
def from_file(cls, filename):
338
tree = ShaFile.from_file(filename)
339
if tree._type != cls._type:
340
raise NotTreeError(filename)
343
def add(self, mode, name, hexsha):
344
self._entries.append((mode, name, hexsha))
347
"""Return a list of tuples describing the tree entries"""
350
def _parse_text(self):
351
"""Grab the entries in the tree"""
353
while count < len(self._text):
355
chr = self._text[count]
357
assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
358
mode = (mode << 3) + (ord(chr) - ord('0'))
360
chr = self._text[count]
362
chr = self._text[count]
367
chr = self._text[count]
369
chr = self._text[count]
370
sha = self._text[count:count+20]
371
hexsha = sha_to_hex(sha)
372
self.add(mode, name, hexsha)
377
for mode, name, hexsha in self._entries:
378
self._text += "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
381
class Commit(ShaFile):
382
"""A git commit object"""
391
def from_file(cls, filename):
392
commit = ShaFile.from_file(filename)
393
if commit._type != cls._type:
394
raise NotCommitError(filename)
397
def _parse_text(self):
400
assert text.startswith(TREE_ID), "Invalid commit object, " \
401
"must start with %s" % TREE_ID
402
count += len(TREE_ID)
403
assert text[count] == ' ', "Invalid commit object, " \
404
"%s must be followed by space not %s" % (TREE_ID, text[count])
406
self._tree = text[count:count+40]
408
assert text[count] == "\n", "Invalid commit object, " \
409
"tree sha must be followed by newline"
412
while text[count:].startswith(PARENT_ID):
413
count += len(PARENT_ID)
414
assert text[count] == ' ', "Invalid commit object, " \
415
"%s must be followed by space not %s" % (PARENT_ID, text[count])
417
self._parents.append(text[count:count+40])
419
assert text[count] == "\n", "Invalid commit object, " \
420
"parent sha must be followed by newline"
423
if text[count:].startswith(AUTHOR_ID):
424
count += len(AUTHOR_ID)
425
assert text[count] == ' ', "Invalid commit object, " \
426
"%s must be followed by space not %s" % (AUTHOR_ID, text[count])
429
while text[count] != '>':
430
assert text[count] != '\n', "Malformed author information"
431
self._author += text[count]
433
self._author += text[count]
435
while text[count] != '\n':
438
self._committer = None
439
if text[count:].startswith(COMMITTER_ID):
440
count += len(COMMITTER_ID)
441
assert text[count] == ' ', "Invalid commit object, " \
442
"%s must be followed by space not %s" % (COMMITTER_ID, text[count])
445
while text[count] != '>':
446
assert text[count] != '\n', "Malformed committer information"
447
self._committer += text[count]
449
self._committer += text[count]
451
assert text[count] == ' ', "Invalid commit object, " \
452
"commiter information must be followed by space not %s" % text[count]
454
self._commit_time = int(text[count:count+10])
455
while text[count] != '\n':
458
assert text[count] == '\n', "There must be a new line after the headers"
460
# XXX: There can be an encoding field.
461
self._message = text[count:]
465
self._text += "%s %s\n" % (TREE_ID, self._tree)
466
for p in self._parents:
467
self._text += "%s %s\n" % (PARENT_ID, p)
468
self._text += "%s %s %s +0000\n" % (AUTHOR_ID, self._author, str(self._commit_time))
469
self._text += "%s %s %s +0000\n" % (COMMITTER_ID, self._committer, str(self._commit_time))
470
self._text += "\n" # There must be a new line after the headers
471
self._text += self._message
475
"""Returns the tree that is the state of this commit"""
480
"""Return a list of parents of this commit."""
485
"""Returns the name of the author of the commit"""
490
"""Returns the name of the committer of the commit"""
491
return self._committer
495
"""Returns the commit message"""
499
def commit_time(self):
500
"""Returns the timestamp of the commit.
502
Returns it as the number of seconds since the epoch.
504
return self._commit_time
520
# 5 Is reserved for further expansion