/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.211.2 by James Westby
Make it more like a real project.
1
# objects.py -- Acces to base git objects
2
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3
# The header parsing code is based on that from git itself, which is
4
# Copyright (C) 2005 Linus Torvalds
5
# and licensed under v2 of the GPL.
6
# 
7
# This program is free software; you can redistribute it and/or
8
# modify it under the terms of the GNU General Public License
9
# as published by the Free Software Foundation; version 2
10
# of the License.
11
# 
12
# This program is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
# GNU General Public License for more details.
16
# 
17
# You should have received a copy of the GNU General Public License
18
# along with this program; if not, write to the Free Software
19
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
# MA  02110-1301, USA.
21
0.211.1 by James Westby
Start the python-git project.
22
import mmap
23
import os
24
import sha
25
import zlib
26
27
blob_id = "blob"
28
tree_id = "tree"
29
commit_id = "commit"
30
parent_id = "parent"
31
author_id = "author"
32
committer_id = "committer"
33
34
def _decompress(string):
35
    dcomp = zlib.decompressobj()
36
    dcomped = dcomp.decompress(string)
37
    dcomped += dcomp.flush()
38
    return dcomped
39
40
def sha_to_hex(sha):
41
  """Takes a string and returns the hex of the sha within"""
42
  hexsha = ''
43
  for c in sha:
44
    if ord(c) < 16:
45
      hexsha += "0%x" % ord(c)
46
    else:
47
      hexsha += "%x" % ord(c)
48
  assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
49
         len(hexsha)
50
  return hexsha
51
52
class ShaFile(object):
53
  """A git SHA file."""
54
55
  def _update_contents(self):
56
    """Update the _contents from the _text"""
57
    self._contents = [ord(c) for c in self._text]
58
59
  @classmethod
60
  def _parse_legacy_object(cls, map):
61
    """Parse a legacy object, creating it and setting object._text"""
62
    text = _decompress(map)
63
    object = None
64
    for posstype in type_map.keys():
65
      if text.startswith(posstype):
66
        object = type_map[posstype]()
67
        text = text[len(posstype):]
68
        break
69
    assert object is not None, "%s is not a known object type" % text[:9]
70
    assert text[0] == ' ', "%s is not a space" % text[0]
71
    text = text[1:]
72
    size = 0
73
    i = 0
74
    while text[0] >= '0' and text[0] <= '9':
75
      if i > 0 and size == 0:
76
        assert False, "Size is not in canonical format"
77
      size = (size * 10) + int(text[0])
78
      text = text[1:]
79
      i += 1
80
    object._size = size
81
    assert text[0] == "\0", "Size not followed by null"
82
    text = text[1:]
83
    object._text = text
84
    object._update_contents()
85
    return object
86
87
  @classmethod
88
  def _parse_object(cls, map):
89
    """Parse a new style object , creating it and setting object._text"""
90
    used = 0
91
    byte = ord(map[used])
92
    used += 1
93
    num_type = (byte >> 4) & 7
94
    try:
95
      object = num_type_map[num_type]()
96
    except KeyError:
97
      assert False, "Not a known type: %d" % num_type
98
    while((byte & 0x80) != 0):
99
      byte = ord(map[used])
100
      used += 1
101
    raw = map[used:]
102
    object._text = _decompress(raw)
103
    object._update_contents()
104
    return object
105
106
  @classmethod
107
  def _parse_file(cls, map):
108
    word = (ord(map[0]) << 8) + ord(map[1])
109
    if ord(map[0]) == 0x78 and (word % 31) == 0:
110
      return cls._parse_legacy_object(map)
111
    else:
112
      return cls._parse_object(map)
113
114
  def __init__(self):
115
    """Don't call this directly"""
116
117
  def _parse_text(self):
118
    """For subclasses to do initialistion time parsing"""
119
120
  @classmethod
121
  def from_file(cls, filename):
122
    """Get the contents of a SHA file on disk"""
123
    size = os.path.getsize(filename)
124
    f = open(filename, 'rb+')
125
    try:
126
      map = mmap.mmap(f.fileno(), size)
127
      shafile = cls._parse_file(map)
128
      shafile._parse_text()
129
      return shafile
130
    finally:
131
      f.close()
132
133
  def _header(self):
134
    return "%s %lu\0" % (self._type, len(self._contents))
135
136
  def contents(self):
137
    """The raw bytes of this object"""
138
    return self._contents
139
140
  def sha(self):
141
    """The SHA1 object that is the name of this object."""
142
    ressha = sha.new()
143
    ressha.update(self._header())
144
    ressha.update(self._text)
145
    return ressha
146
147
class Blob(ShaFile):
148
  """A Git Blob object."""
149
150
  _type = blob_id
151
152
  def text(self):
153
    """The text contained within the blob object."""
154
    return self._text
155
156
  @classmethod
157
  def from_file(cls, filename):
158
    blob = ShaFile.from_file(filename)
159
    assert blob._type == cls._type, "%s is not a blob object" % filename
160
    return blob
161
162
  @classmethod
163
  def from_string(cls, string):
164
    """Create a blob from a string."""
165
    shafile = cls()
166
    shafile._text = string
167
    shafile._update_contents()
168
    return shafile
169
170
class Tree(ShaFile):
171
  """A Git tree object"""
172
173
  _type = tree_id
174
175
  @classmethod
176
  def from_file(cls, filename):
177
    tree = ShaFile.from_file(filename)
178
    assert tree._type == cls._type, "%s is not a tree object" % filename
179
    return tree
180
181
  def entries(self):
182
    """Reutrn a list of tuples describing the tree entries"""
183
    return self._entries
184
185
  def _parse_text(self):
186
    """Grab the entries in the tree"""
187
    self._entries = []
188
    count = 0
189
    while count < len(self._text):
190
      mode = 0
191
      chr = self._text[count]
192
      while chr != ' ':
193
        assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
194
        mode = (mode << 3) + (ord(chr) - ord('0'))
195
        count += 1
196
        chr = self._text[count]
197
      count += 1
198
      chr = self._text[count]
199
      name = ''
200
      while chr != '\0':
201
        name += chr
202
        count += 1
203
        chr = self._text[count]
204
      count += 1
205
      chr = self._text[count]
206
      sha = self._text[count:count+20]
207
      hexsha = sha_to_hex(sha)
208
      self._entries.append((mode, name, hexsha))
209
      count = count + 20
210
211
class Commit(ShaFile):
212
  """A git commit object"""
213
214
  _type = commit_id
215
216
  @classmethod
217
  def from_file(cls, filename):
218
    commit = ShaFile.from_file(filename)
219
    assert commit._type == cls._type, "%s is not a commit object" % filename
220
    return commit
221
222
  def _parse_text(self):
223
    text = self._text
224
    count = 0
225
    assert text.startswith(tree_id), "Invlid commit object, " \
226
         "must start with %s" % tree_id
227
    count += len(tree_id)
228
    assert text[count] == ' ', "Invalid commit object, " \
229
         "%s must be followed by space not %s" % (tree_id, text[count])
230
    count += 1
231
    self._tree = text[count:count+40]
232
    count = count + 40
233
    assert text[count] == "\n", "Invalid commit object, " \
234
         "tree sha must be followed by newline"
235
    count += 1
236
    self._parents = []
237
    while text[count:].startswith(parent_id):
238
      count += len(parent_id)
239
      assert text[count] == ' ', "Invalid commit object, " \
240
           "%s must be followed by space not %s" % (parent_id, text[count])
241
      count += 1
242
      self._parents.append(text[count:count+40])
243
      count += 40
244
      assert text[count] == "\n", "Invalid commit object, " \
245
           "parent sha must be followed by newline"
246
      count += 1
247
    self._author = None
248
    if text[count:].startswith(author_id):
249
      count += len(author_id)
250
      assert text[count] == ' ', "Invalid commit object, " \
251
           "%s must be followed by space not %s" % (author_id, text[count])
252
      count += 1
253
      self._author = ''
254
      while text[count] != '\n':
255
        self._author += text[count]
256
        count += 1
257
      count += 1
258
    self._committer = None
259
    if text[count:].startswith(committer_id):
260
      count += len(committer_id)
261
      assert text[count] == ' ', "Invalid commit object, " \
262
           "%s must be followed by space not %s" % (committer_id, text[count])
263
      count += 1
264
      self._committer = ''
265
      while text[count] != '\n':
266
        self._committer += text[count]
267
        count += 1
268
      count += 1
269
    assert text[count] == '\n', "There must be a new line after the headers"
270
    count += 1
271
    self._message = text[count:]
272
273
  def tree(self):
274
    """Returns the tree that is the state of this commit"""
275
    return self._tree
276
277
  def parents(self):
278
    """Return a list of parents of this commit."""
279
    return self._parents
280
281
  def author(self):
282
    """Returns the name of the author of the commit"""
283
    return self._author
284
285
  def committer(self):
286
    """Returns the name of the committer of the commit"""
287
    return self._committer
288
289
  def message(self):
290
    """Returns the commit message"""
291
    return self._message
292
293
type_map = {
294
  blob_id : Blob,
295
  tree_id : Tree,
296
  commit_id : Commit,
297
}
298
299
num_type_map = {
300
  1 : Commit,
301
  2 : Tree,
302
  3 : Blob,
303
}
304