/brz/remove-bazaar : contents of git/objects.py at revision 0.211.2

: (revision 0.211.2)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# objects.py -- Acces to base git objects
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
# The header parsing code is based on that from git itself, which is
# Copyright (C) 2005 Linus Torvalds
# and licensed under v2 of the GPL.
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; version 2
# of the License.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA  02110-1301, USA.

import mmap
import os
import sha
import zlib

blob_id = "blob"
tree_id = "tree"
commit_id = "commit"
parent_id = "parent"
author_id = "author"
committer_id = "committer"

def _decompress(string):
    dcomp = zlib.decompressobj()
    dcomped = dcomp.decompress(string)
    dcomped += dcomp.flush()
    return dcomped

def sha_to_hex(sha):
  """Takes a string and returns the hex of the sha within"""
  hexsha = ''
  for c in sha:
    if ord(c) < 16:
      hexsha += "0%x" % ord(c)
    else:
      hexsha += "%x" % ord(c)
  assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
         len(hexsha)
  return hexsha

class ShaFile(object):
  """A git SHA file."""

  def _update_contents(self):
    """Update the _contents from the _text"""
    self._contents = [ord(c) for c in self._text]

  @classmethod
  def _parse_legacy_object(cls, map):
    """Parse a legacy object, creating it and setting object._text"""
    text = _decompress(map)
    object = None
    for posstype in type_map.keys():
      if text.startswith(posstype):
        object = type_map[posstype]()
        text = text[len(posstype):]
        break
    assert object is not None, "%s is not a known object type" % text[:9]
    assert text[0] == ' ', "%s is not a space" % text[0]
    text = text[1:]
    size = 0
    i = 0
    while text[0] >= '0' and text[0] <= '9':
      if i > 0 and size == 0:
        assert False, "Size is not in canonical format"
      size = (size * 10) + int(text[0])
      text = text[1:]
      i += 1
    object._size = size
    assert text[0] == "\0", "Size not followed by null"
    text = text[1:]
    object._text = text
    object._update_contents()
    return object

  @classmethod
  def _parse_object(cls, map):
    """Parse a new style object , creating it and setting object._text"""
    used = 0
    byte = ord(map[used])
    used += 1
    num_type = (byte >> 4) & 7
    try:
      object = num_type_map[num_type]()
    except KeyError:
      assert False, "Not a known type: %d" % num_type
    while((byte & 0x80) != 0):
      byte = ord(map[used])
      used += 1
    raw = map[used:]
    object._text = _decompress(raw)
    object._update_contents()
    return object

  @classmethod
  def _parse_file(cls, map):
    word = (ord(map[0]) << 8) + ord(map[1])
    if ord(map[0]) == 0x78 and (word % 31) == 0:
      return cls._parse_legacy_object(map)
    else:
      return cls._parse_object(map)

  def __init__(self):
    """Don't call this directly"""

  def _parse_text(self):
    """For subclasses to do initialistion time parsing"""

  @classmethod
  def from_file(cls, filename):
    """Get the contents of a SHA file on disk"""
    size = os.path.getsize(filename)
    f = open(filename, 'rb+')
    try:
      map = mmap.mmap(f.fileno(), size)
      shafile = cls._parse_file(map)
      shafile._parse_text()
      return shafile
    finally:
      f.close()

  def _header(self):
    return "%s %lu\0" % (self._type, len(self._contents))

  def contents(self):
    """The raw bytes of this object"""
    return self._contents

  def sha(self):
    """The SHA1 object that is the name of this object."""
    ressha = sha.new()
    ressha.update(self._header())
    ressha.update(self._text)
    return ressha

class Blob(ShaFile):
  """A Git Blob object."""

  _type = blob_id

  def text(self):
    """The text contained within the blob object."""
    return self._text

  @classmethod
  def from_file(cls, filename):
    blob = ShaFile.from_file(filename)
    assert blob._type == cls._type, "%s is not a blob object" % filename
    return blob

  @classmethod
  def from_string(cls, string):
    """Create a blob from a string."""
    shafile = cls()
    shafile._text = string
    shafile._update_contents()
    return shafile

class Tree(ShaFile):
  """A Git tree object"""

  _type = tree_id

  @classmethod
  def from_file(cls, filename):
    tree = ShaFile.from_file(filename)
    assert tree._type == cls._type, "%s is not a tree object" % filename
    return tree

  def entries(self):
    """Reutrn a list of tuples describing the tree entries"""
    return self._entries

  def _parse_text(self):
    """Grab the entries in the tree"""
    self._entries = []
    count = 0
    while count < len(self._text):
      mode = 0
      chr = self._text[count]
      while chr != ' ':
        assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
        mode = (mode << 3) + (ord(chr) - ord('0'))
        count += 1
        chr = self._text[count]
      count += 1
      chr = self._text[count]
      name = ''
      while chr != '\0':
        name += chr
        count += 1
        chr = self._text[count]
      count += 1
      chr = self._text[count]
      sha = self._text[count:count+20]
      hexsha = sha_to_hex(sha)
      self._entries.append((mode, name, hexsha))
      count = count + 20

class Commit(ShaFile):
  """A git commit object"""

  _type = commit_id

  @classmethod
  def from_file(cls, filename):
    commit = ShaFile.from_file(filename)
    assert commit._type == cls._type, "%s is not a commit object" % filename
    return commit

  def _parse_text(self):
    text = self._text
    count = 0
    assert text.startswith(tree_id), "Invlid commit object, " \
         "must start with %s" % tree_id
    count += len(tree_id)
    assert text[count] == ' ', "Invalid commit object, " \
         "%s must be followed by space not %s" % (tree_id, text[count])
    count += 1
    self._tree = text[count:count+40]
    count = count + 40
    assert text[count] == "\n", "Invalid commit object, " \
         "tree sha must be followed by newline"
    count += 1
    self._parents = []
    while text[count:].startswith(parent_id):
      count += len(parent_id)
      assert text[count] == ' ', "Invalid commit object, " \
           "%s must be followed by space not %s" % (parent_id, text[count])
      count += 1
      self._parents.append(text[count:count+40])
      count += 40
      assert text[count] == "\n", "Invalid commit object, " \
           "parent sha must be followed by newline"
      count += 1
    self._author = None
    if text[count:].startswith(author_id):
      count += len(author_id)
      assert text[count] == ' ', "Invalid commit object, " \
           "%s must be followed by space not %s" % (author_id, text[count])
      count += 1
      self._author = ''
      while text[count] != '\n':
        self._author += text[count]
        count += 1
      count += 1
    self._committer = None
    if text[count:].startswith(committer_id):
      count += len(committer_id)
      assert text[count] == ' ', "Invalid commit object, " \
           "%s must be followed by space not %s" % (committer_id, text[count])
      count += 1
      self._committer = ''
      while text[count] != '\n':
        self._committer += text[count]
        count += 1
      count += 1
    assert text[count] == '\n', "There must be a new line after the headers"
    count += 1
    self._message = text[count:]

  def tree(self):
    """Returns the tree that is the state of this commit"""
    return self._tree

  def parents(self):
    """Return a list of parents of this commit."""
    return self._parents

  def author(self):
    """Returns the name of the author of the commit"""
    return self._author

  def committer(self):
    """Returns the name of the committer of the commit"""
    return self._committer

  def message(self):
    """Returns the commit message"""
    return self._message

type_map = {
  blob_id : Blob,
  tree_id : Tree,
  commit_id : Commit,
}

num_type_map = {
  1 : Commit,
  2 : Tree,
  3 : Blob,
}


0.211.2 by James Westby Make it more like a real project.	1	# objects.py -- Acces to base git objects
	2	# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
	3	# The header parsing code is based on that from git itself, which is
	4	# Copyright (C) 2005 Linus Torvalds
	5	# and licensed under v2 of the GPL.
	6	#
	7	# This program is free software; you can redistribute it and/or
	8	# modify it under the terms of the GNU General Public License
	9	# as published by the Free Software Foundation; version 2
	10	# of the License.
	11	#
	12	# This program is distributed in the hope that it will be useful,
	13	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	# GNU General Public License for more details.
	16	#
	17	# You should have received a copy of the GNU General Public License
	18	# along with this program; if not, write to the Free Software
	19	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
	20	# MA 02110-1301, USA.
	21
0.211.1 by James Westby Start the python-git project.	22	import mmap
	23	import os
	24	import sha
	25	import zlib
	26
	27	blob_id = "blob"
	28	tree_id = "tree"
	29	commit_id = "commit"
	30	parent_id = "parent"
	31	author_id = "author"
	32	committer_id = "committer"
	33
	34	def _decompress(string):
	35	dcomp = zlib.decompressobj()
	36	dcomped = dcomp.decompress(string)
	37	dcomped += dcomp.flush()
	38	return dcomped
	39
	40	def sha_to_hex(sha):
	41	"""Takes a string and returns the hex of the sha within"""
	42	hexsha = ''
	43	for c in sha:
	44	if ord(c) < 16:
	45	hexsha += "0%x" % ord(c)
	46	else:
	47	hexsha += "%x" % ord(c)
	48	assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
	49	len(hexsha)
	50	return hexsha
	51
	52	class ShaFile(object):
	53	"""A git SHA file."""
	54
	55	def _update_contents(self):
	56	"""Update the _contents from the _text"""
	57	self._contents = [ord(c) for c in self._text]
	58
	59	@classmethod
	60	def _parse_legacy_object(cls, map):
	61	"""Parse a legacy object, creating it and setting object._text"""
	62	text = _decompress(map)
	63	object = None
	64	for posstype in type_map.keys():
	65	if text.startswith(posstype):
	66	object = type_map[posstype]()
	67	text = text[len(posstype):]
	68	break
	69	assert object is not None, "%s is not a known object type" % text[:9]
	70	assert text[0] == ' ', "%s is not a space" % text[0]
	71	text = text[1:]
	72	size = 0
	73	i = 0
	74	while text[0] >= '0' and text[0] <= '9':
	75	if i > 0 and size == 0:
	76	assert False, "Size is not in canonical format"
	77	size = (size * 10) + int(text[0])
	78	text = text[1:]
	79	i += 1
	80	object._size = size
	81	assert text[0] == "\0", "Size not followed by null"
	82	text = text[1:]
	83	object._text = text
	84	object._update_contents()
	85	return object
86
87	@classmethod
88	def _parse_object(cls, map):
89	"""Parse a new style object , creating it and setting object._text"""
90	used = 0
91	byte = ord(map[used])
92	used += 1
93	num_type = (byte >> 4) & 7
94	try:
95	object = num_type_map[num_type]()
96	except KeyError:
97	assert False, "Not a known type: %d" % num_type
98	while((byte & 0x80) != 0):
99	byte = ord(map[used])
100	used += 1
101	raw = map[used:]
102	object._text = _decompress(raw)
103	object._update_contents()
104	return object
105
106	@classmethod
107	def _parse_file(cls, map):
108	word = (ord(map[0]) << 8) + ord(map[1])
109	if ord(map[0]) == 0x78 and (word % 31) == 0:
110	return cls._parse_legacy_object(map)
111	else:
112	return cls._parse_object(map)
113
114	def __init__(self):
115	"""Don't call this directly"""
116
117	def _parse_text(self):
118	"""For subclasses to do initialistion time parsing"""
119
120	@classmethod
121	def from_file(cls, filename):
122	"""Get the contents of a SHA file on disk"""
123	size = os.path.getsize(filename)
124	f = open(filename, 'rb+')
125	try:
126	map = mmap.mmap(f.fileno(), size)
127	shafile = cls._parse_file(map)
128	shafile._parse_text()
129	return shafile
130	finally:
131	f.close()
132
133	def _header(self):
134	return "%s %lu\0" % (self._type, len(self._contents))
135
136	def contents(self):
137	"""The raw bytes of this object"""
138	return self._contents
139
140	def sha(self):
141	"""The SHA1 object that is the name of this object."""
142	ressha = sha.new()
143	ressha.update(self._header())
144	ressha.update(self._text)
145	return ressha
146
147	class Blob(ShaFile):
148	"""A Git Blob object."""
149
150	_type = blob_id
151
152	def text(self):
153	"""The text contained within the blob object."""
154	return self._text
155
156	@classmethod
157	def from_file(cls, filename):
158	blob = ShaFile.from_file(filename)
159	assert blob._type == cls._type, "%s is not a blob object" % filename
160	return blob
161
162	@classmethod
163	def from_string(cls, string):
164	"""Create a blob from a string."""
165	shafile = cls()
166	shafile._text = string
167	shafile._update_contents()
168	return shafile
169
170	class Tree(ShaFile):
171	"""A Git tree object"""
172
173	_type = tree_id
174
175	@classmethod
176	def from_file(cls, filename):
177	tree = ShaFile.from_file(filename)
178	assert tree._type == cls._type, "%s is not a tree object" % filename
179	return tree
180
181	def entries(self):
182	"""Reutrn a list of tuples describing the tree entries"""
183	return self._entries
184
185	def _parse_text(self):
186	"""Grab the entries in the tree"""
187	self._entries = []
188	count = 0
189	while count < len(self._text):
190	mode = 0
191	chr = self._text[count]
192	while chr != ' ':
193	assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
194	mode = (mode << 3) + (ord(chr) - ord('0'))
195	count += 1
196	chr = self._text[count]
197	count += 1
198	chr = self._text[count]
199	name = ''
200	while chr != '\0':
201	name += chr
202	count += 1
203	chr = self._text[count]
204	count += 1
205	chr = self._text[count]
206	sha = self._text[count:count+20]
207	hexsha = sha_to_hex(sha)
208	self._entries.append((mode, name, hexsha))
209	count = count + 20
210
211	class Commit(ShaFile):
212	"""A git commit object"""
213
214	_type = commit_id
215
216	@classmethod
217	def from_file(cls, filename):
218	commit = ShaFile.from_file(filename)
219	assert commit._type == cls._type, "%s is not a commit object" % filename
220	return commit
221
222	def _parse_text(self):
223	text = self._text
224	count = 0
225	assert text.startswith(tree_id), "Invlid commit object, " \
226	"must start with %s" % tree_id
227	count += len(tree_id)
228	assert text[count] == ' ', "Invalid commit object, " \
229	"%s must be followed by space not %s" % (tree_id, text[count])
230	count += 1
231	self._tree = text[count:count+40]
232	count = count + 40
233	assert text[count] == "\n", "Invalid commit object, " \
234	"tree sha must be followed by newline"
235	count += 1
236	self._parents = []
237	while text[count:].startswith(parent_id):
238	count += len(parent_id)
239	assert text[count] == ' ', "Invalid commit object, " \
240	"%s must be followed by space not %s" % (parent_id, text[count])
241	count += 1
242	self._parents.append(text[count:count+40])
243	count += 40
244	assert text[count] == "\n", "Invalid commit object, " \
245	"parent sha must be followed by newline"
246	count += 1
247	self._author = None
248	if text[count:].startswith(author_id):
249	count += len(author_id)
250	assert text[count] == ' ', "Invalid commit object, " \
251	"%s must be followed by space not %s" % (author_id, text[count])
252	count += 1
253	self._author = ''
254	while text[count] != '\n':
255	self._author += text[count]
256	count += 1
257	count += 1
258	self._committer = None
259	if text[count:].startswith(committer_id):
260	count += len(committer_id)
261	assert text[count] == ' ', "Invalid commit object, " \
262	"%s must be followed by space not %s" % (committer_id, text[count])
263	count += 1
264	self._committer = ''
265	while text[count] != '\n':
266	self._committer += text[count]
267	count += 1
268	count += 1
269	assert text[count] == '\n', "There must be a new line after the headers"
270	count += 1
271	self._message = text[count:]
272
273	def tree(self):
274	"""Returns the tree that is the state of this commit"""
275	return self._tree
276
277	def parents(self):
278	"""Return a list of parents of this commit."""
279	return self._parents
280
281	def author(self):
282	"""Returns the name of the author of the commit"""
283	return self._author
284
285	def committer(self):
286	"""Returns the name of the committer of the commit"""
287	return self._committer
288
289	def message(self):
290	"""Returns the commit message"""
291	return self._message
292
293	type_map = {
294	blob_id : Blob,
295	tree_id : Tree,
296	commit_id : Commit,
297	}
298
299	num_type_map = {
300	1 : Commit,
301	2 : Tree,
302	3 : Blob,
303	}
304