1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""An adapter between a Git Repository and a Bazaar Branch"""
30
from bzrlib.plugins.git import (
36
class GitRepository(repository.Repository):
37
"""An adapter to git repositories for bzr."""
39
# To make bzrlib happy
42
def __init__(self, gitdir, lockfiles):
44
self.control_files = lockfiles
45
gitdirectory = gitdir.transport.local_abspath('.')
46
self._git = model.GitModel(gitdirectory)
47
self._revision_cache = {}
49
self._entry_revision_cache = {}
50
self._inventory_cache = {}
52
def _ancestor_revisions(self, revision_ids):
53
if revision_ids is not None:
54
git_revisions = [gitrevid_from_bzr(r) for r in revision_ids]
57
for lines in self._git.ancestor_lines(git_revisions):
58
yield self._parse_rev(lines)
59
# print "fetched ancestors:", git_revisions
64
def supports_rich_root(self):
67
def get_revision_graph(self, revision_id=None):
69
if revision_id is not None:
70
param = [ids.convert_revision_id_bzr_to_git(revision_id)]
73
git_graph = self._git.get_revision_graph(param)
74
# print "fetched revision graph:", param
75
for node, parents in git_graph.iteritems():
76
bzr_node = ids.convert_revision_id_git_to_bzr(node)
77
bzr_parents = [ids.convert_revision_id_git_to_bzr(n)
79
result[bzr_node] = bzr_parents
82
def get_revision_graph_with_ghosts(self, revision_ids=None):
83
graph = deprecated_graph.Graph()
84
if revision_ids is not None:
85
revision_ids = [ids.convert_revision_id_bzr_to_git(r)
86
for r in revision_ids]
87
git_graph = self._git.get_revision_graph(revision_ids)
88
# print "fetched revision graph (ghosts):", revision_ids
89
for node, parents in git_graph.iteritems():
90
bzr_node = ids.convert_revision_id_git_to_bzr(node)
91
bzr_parents = [ids.convert_revision_id_git_to_bzr(n)
94
graph.add_node(bzr_node, bzr_parents)
97
def get_ancestry(self, revision_id):
98
param = [ids.convert_revision_id_bzr_to_git(revision_id)]
99
git_ancestry = self._git.get_ancestry(param)
100
# print "fetched ancestry:", param
102
ids.convert_revision_id_git_to_bzr(git_id)
103
for git_id in git_ancestry]
105
def get_signature_text(self, revision_id):
106
raise errors.NoSuchRevision(self, revision_id)
108
def get_inventory_xml(self, revision_id):
109
"""See Repository.get_inventory_xml()."""
110
return bzrlib.xml5.serializer_v5.write_inventory_to_string(
111
self.get_inventory(revision_id))
113
def get_inventory_sha1(self, revision_id):
114
"""Get the sha1 for the XML representation of an inventory.
116
:param revision_id: Revision id of the inventory for which to return
121
return osutils.sha_string(self.get_inventory_xml(revision_id))
123
def get_revision_xml(self, revision_id):
124
"""Return the XML representation of a revision.
126
:param revision_id: Revision for which to return the XML.
129
return bzrlib.xml5.serializer_v5.write_revision_to_string(
130
self.get_revision(revision_id))
132
def get_revision(self, revision_id):
133
if revision_id in self._revision_cache:
134
return self._revision_cache[revision_id]
135
git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
136
raw = self._git.rev_list([git_commit_id], max_count=1, header=True)
137
# print "fetched revision:", git_commit_id
138
revision = self._parse_rev(raw)
139
self._revision_cache[revision_id] = revision
142
def has_revision(self, revision_id):
144
self.get_revision(revision_id)
145
except NoSuchRevision:
150
def get_revisions(self, revisions):
151
return [self.get_revision(r) for r in revisions]
154
def _parse_rev(klass, raw):
155
"""Parse a single git revision.
157
* The first line is the git commit id.
158
* Following lines conform to the 'name value' structure, until the
160
* All lines after the first blank line and until the NULL line have 4
161
leading spaces and constitute the commit message.
163
:param raw: sequence of newline-terminated strings, its last item is a
164
single NULL character.
165
:return: a `bzrlib.revision.Revision` object.
170
committer_was_set = False
171
revision_id = ids.convert_revision_id_git_to_bzr(raw[0][:-1])
172
rev = revision.Revision(revision_id)
173
rev.inventory_sha1 = ""
174
assert raw[-1] == '\x00', (
175
"Last item of raw was not a single NULL character.")
176
for line in raw[1:-1]:
178
assert line[:4] == ' ', (
179
"Unexpected line format in commit message: %r" % line)
180
message_lines.append(line[4:])
185
name, value = line[:-1].split(' ', 1)
187
rev.parent_ids.append(
188
ids.convert_revision_id_git_to_bzr(value))
191
author, timestamp, timezone = value.rsplit(' ', 2)
192
rev.properties['author'] = author
193
rev.properties['git-author-timestamp'] = timestamp
194
rev.properties['git-author-timezone'] = timezone
195
if not committer_was_set:
196
rev.committer = author
197
rev.timestamp = float(timestamp)
198
rev.timezone = klass._parse_tz(timezone)
200
if name == 'committer':
201
committer_was_set = True
202
committer, timestamp, timezone = value.rsplit(' ', 2)
203
rev.committer = committer
204
rev.timestamp = float(timestamp)
205
rev.timezone = klass._parse_tz(timezone)
208
rev.properties['git-tree-id'] = value
211
rev.message = ''.join(message_lines)
215
def _parse_tz(klass, tz):
216
"""Parse a timezone specification in the [+|-]HHMM format.
218
:return: the timezone offset in seconds.
221
sign = {'+': +1, '-': -1}[tz[0]]
223
minutes = int(tz[3:])
224
return sign * 60 * (60 * hours + minutes)
226
def revision_trees(self, revids):
228
yield self.revision_tree(revid)
230
def revision_tree(self, revision_id):
231
return GitRevisionTree(self, revision_id)
233
def _get_blob(self, git_id):
235
return self._blob_cache[git_id]
237
blob = self._git.cat_file('blob', git_id)
238
# print "fetched blob:", git_id
239
self._blob_cache[git_id] = blob
242
def get_inventory(self, revision_id):
243
if revision_id is None:
244
revision_id = revision.NULL_REVISION
245
if revision_id == revision.NULL_REVISION:
246
return inventory.Inventory(
247
revision_id=revision_id, root_id=None)
249
# First pass at building the inventory. We need this one to get the
250
# git ids, so we do not have to cache the entire tree text. Ideally,
251
# this should be all we need to do.
252
git_commit = ids.convert_revision_id_bzr_to_git(revision_id)
253
git_inventory = self._git.get_inventory(git_commit)
254
# print "fetched inventory:", git_commit
255
inv = self._parse_inventory(revision_id, git_inventory)
257
# Second pass at building the inventory. There we retrieve additional
258
# data that bzrlib requires: text sizes, sha1s, symlink targets and
259
# revisions that introduced inventory entries
260
inv.git_file_data = {}
261
for file_id in sorted(inv.git_ids.iterkeys()):
262
git_id = inv.git_ids[file_id]
264
self._set_entry_text_info(inv, entry, git_id)
265
for file_id in sorted(inv.git_ids.iterkeys()):
266
git_id = inv.git_ids[file_id]
268
path = inv.id2path(file_id)
269
self._set_entry_revision(entry, revision_id, path, git_id)
273
def _parse_inventory(klass, revid, git_inv):
274
# For now, git inventory do not have root ids. It is not clear that we
275
# can reliably support root ids. -- David Allouche 2007-12-28
276
inv = inventory.Inventory(revision_id=revid)
278
for perms, git_kind, git_id, path in git_inv:
281
if git_kind == 'blob':
284
executable = bool(int(perms[-3:], 8) & 0111)
285
elif perms[1] == '2':
288
raise AssertionError(
289
"Unknown blob kind, perms=%r." % (perms,))
290
elif git_kind == 'tree':
293
raise AssertionError(
294
"Unknown git entry kind: %r" % (git_kind,))
295
# XXX: Maybe the file id should be prefixed by file kind, so when
296
# the kind of path changes, the id changes too.
297
# -- David Allouche 2007-12-28.
298
file_id = escape_file_id(path.encode('utf-8'))
299
entry = inv.add_path(path, kind, file_id=file_id)
300
entry.executable = executable
301
inv.git_ids[file_id] = git_id
302
inv.root.revision = revid
305
def _set_entry_text_info(self, inv, entry, git_id):
306
if entry.kind == 'directory':
308
lines = self._get_blob(git_id)
309
entry.text_size = sum(len(line) for line in lines)
310
entry.text_sha1 = osutils.sha_strings(lines)
311
if entry.kind == 'symlink':
312
entry.symlink_target = ''.join(lines)
313
inv.git_file_data[entry.file_id] = lines
315
def _get_file_revision(self, revision_id, path):
316
lines = self._git.rev_list(
317
[ids.convert_revision_id_bzr_to_git(revision_id)],
318
max_count=1, topo_order=True, paths=[path])
320
result = ids.convert_revision_id_git_to_bzr(line[:-1])
321
# print "fetched file revision", line[:-1], path
324
# The various version of _get_entry_revision can be tested by pulling from
325
# the git repo of git itself. First pull up to r700, then r702 to
326
# reproduce the RevisionNotPresent errors.
328
def _set_entry_revision_unoptimized(self, entry, revid, path, git_id):
329
# This is unusably slow and will lead to recording a few unnecessary
330
# duplicated file texts. But it seems to be consistent enough to let
331
# pulls resume without causing RevisionNotPresent errors.
332
entry.revision = self._get_file_revision(revid, path)
334
def _set_entry_revision_optimized1(self, entry, revid, path, git_id):
335
# This is much faster, produces fewer unique file texts, but will
336
# cause RevisionNotPresent errors when resuming pull.
338
# Oops, this does not account for changes in executable bit. That is
339
# probably why it produces fewer unique texts.
340
cached = self._entry_revision_cache.get((revid, path, git_id))
341
if cached is not None:
342
entry.revision = cached
344
revision = self.get_revision(revid)
345
for parent_id in revision.parent_ids:
346
entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))
347
if entry_rev is not None:
350
entry_rev = self._get_file_revision(revid, path)
351
self._entry_revision_cache[(revid, path, git_id)] = entry_rev
352
entry.revision = entry_rev
354
def _set_entry_revision_optimized2(self, entry, revid, path, git_id):
355
# This is slower than the previous one, and does not appear to have a
356
# subtantially different effect. Same number of unique texts, same
357
# RevisionNotPresent error.
359
# Oops, this does not account for changes in executable bit. That is
360
# probably why it produces fewer unique texts.
361
cached = self._entry_revision_cache.get((revid, path, git_id))
362
if cached is not None:
363
entry.revision = cached
365
revision = self.get_revision(revid)
367
for parent_id in revision.parent_ids:
368
entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))
369
if entry_rev is not None:
370
parent_hits.append(entry_rev)
371
if len(parent_hits) == len(revision.parent_ids) and len(set(parent_hits)) == 1:
372
entry_rev = parent_hits[0]
374
entry_rev = self._get_file_revision(revid, path)
375
self._entry_revision_cache[(revid, path, git_id)] = entry_rev
376
entry.revision = entry_rev
378
_original_get_inventory = get_inventory
379
def _get_inventory_caching(self, revid):
380
if revid in self._inventory_cache:
381
return self._inventory_cache[revid]
382
inv = self._original_get_inventory(revid)
383
self._inventory_cache[revid] = inv
386
def _set_entry_revision_optimized3(self, entry, revid, path, git_id):
387
# Depends on _get_inventory_caching.
389
# Set the revision of directories to the current revision. It's not
390
# accurate, but we cannot compare directory contents from here.
391
if entry.kind == 'directory':
392
entry.revision = revid
394
# Build ancestral inventories by walking parents depth first. Ideally
395
# this should be done in an inter-repository, where already imported
396
# data can be used as reference.
397
current_revid = revid
398
revision = self.get_revision(revid)
399
pending_revids = list(reversed(revision.parent_ids))
400
while pending_revids:
401
revid = pending_revids.pop()
402
if revid in self._inventory_cache:
404
# Not in cache, ensure parents are in cache first.
405
pending_revids.append(revid)
406
revision = self.get_revision(revid)
407
for parent_id in reversed(revision.parent_ids):
408
if parent_id not in self._inventory_cache:
409
pending_revids.extend(reversed(revision.parent_ids))
412
# All parents are in cache, we can now build this inventory.
413
revid = pending_revids.pop()
414
self.get_inventory(revid) # populate cache
415
# We now have all ancestral inventories in the cache. Get entries by
416
# the same file_id in parent inventories, and use the revision of the
417
# first one that has the same text_sha1 and executable bit.
418
revision = self.get_revision(current_revid)
419
for revid in revision.parent_ids:
420
inventory = self.get_inventory(revid)
421
if entry.file_id in inventory:
422
parent_entry = inventory[entry.file_id]
423
if (parent_entry.text_sha1 == entry.text_sha1
424
and parent_entry.executable == entry.executable):
425
entry.revision = parent_entry.revision
427
# If we get here, that means we found no matching parent entry, use
428
# the current revision.
429
entry.revision = current_revid
431
def _set_entry_revision_optimized4(self, entry, revid, path, git_id):
432
# Same as optimized1, but uses the executable bit in the cache index.
433
# That appears to have the same behaviour as the unoptimized version.
434
cached = self._entry_revision_cache.get(
435
(revid, path, git_id, entry.executable))
436
if cached is not None:
437
entry.revision = cached
439
revision = self.get_revision(revid)
440
for parent_id in revision.parent_ids:
441
entry_rev = self._entry_revision_cache.get(
442
(parent_id, path, git_id, entry.executable))
443
if entry_rev is not None:
446
entry_rev = self._get_file_revision(revid, path)
447
self._entry_revision_cache[
448
(revid, path, git_id, entry.executable)] = entry_rev
449
entry.revision = entry_rev
451
def _set_entry_revision_optimized5(self, entry, revid, path, git_id):
452
# Same as optimized4, but makes get_inventory non-reentrant, and uses
453
# a more structured cache.
455
# cache[revision][path, git_id, executable] -> revision
457
# If a revision is in the cache, we assume it contains entries for the
458
# whole inventory. So if all parent revisions are in the cache, but no
459
# parent entry is present, then the entry revision is the current
460
# revision. That amortizes the number of git calls for large pulls to
462
cached = self._entry_revision_cache.get(revid, {}).get(
463
(path, git_id, entry.executable))
464
if cached is not None:
465
entry.revision = cached
467
revision = self.get_revision(revid)
468
all_parents_in_cache = True
469
for parent_id in revision.parent_ids:
470
if parent_id not in self._entry_revision_cache:
471
all_parents_in_cache = False
473
entry_rev = self._entry_revision_cache[parent_id].get(
474
(path, git_id, entry.executable))
475
if entry_rev is not None:
478
if all_parents_in_cache:
481
entry_rev = self._get_file_revision(revid, path)
482
self._entry_revision_cache.setdefault(
483
revid, {})[(path, git_id, entry.executable)] = entry_rev
484
entry.revision = entry_rev
486
_set_entry_revision = _set_entry_revision_optimized5
487
#get_inventory = _get_inventory_caching
490
def escape_file_id(file_id):
491
return file_id.replace('_', '__').replace(' ', '_s')
493
class GitRevisionTree(revisiontree.RevisionTree):
495
def __init__(self, repository, revision_id):
496
if revision_id is None:
497
revision_id = revision.NULL_REVISION
498
self._inventory = repository.get_inventory(revision_id)
499
self._repository = repository
500
self._revision_id = revision_id
502
def get_file_lines(self, file_id):
503
entry = self._inventory[file_id]
504
if entry.kind == 'directory': return []
505
return self._inventory.git_file_data[file_id]
507
obj_id = self._inventory.git_ids[file_id]
508
assert obj_id is not None, (
509
"git_id must not be None: %r" % (self._inventory[file_id],))
510
return self._repository._git.cat_file('blob', obj_id)