86
94
graph.add_node(bzr_node, bzr_parents)
97
def get_ancestry(self, revision_id):
98
param = [ids.convert_revision_id_bzr_to_git(revision_id)]
99
git_ancestry = self._git.get_ancestry(param)
100
# print "fetched ancestry:", param
102
ids.convert_revision_id_git_to_bzr(git_id)
103
for git_id in git_ancestry]
105
def get_signature_text(self, revision_id):
106
raise errors.NoSuchRevision(self, revision_id)
108
def get_inventory_xml(self, revision_id):
109
"""See Repository.get_inventory_xml()."""
110
return bzrlib.xml5.serializer_v5.write_inventory_to_string(
111
self.get_inventory(revision_id))
113
def get_inventory_sha1(self, revision_id):
114
"""Get the sha1 for the XML representation of an inventory.
116
:param revision_id: Revision id of the inventory for which to return
121
return osutils.sha_string(self.get_inventory_xml(revision_id))
123
def get_revision_xml(self, revision_id):
124
"""Return the XML representation of a revision.
126
:param revision_id: Revision for which to return the XML.
129
return bzrlib.xml5.serializer_v5.write_revision_to_string(
130
self.get_revision(revision_id))
89
132
def get_revision(self, revision_id):
90
133
if revision_id in self._revision_cache:
91
134
return self._revision_cache[revision_id]
92
raw = self._git.rev_list(
93
[ids.convert_revision_id_bzr_to_git(revision_id)],
94
max_count=1, header=True)
95
return self._parse_rev(raw)
135
git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
136
raw = self._git.rev_list([git_commit_id], max_count=1, header=True)
137
# print "fetched revision:", git_commit_id
138
revision = self._parse_rev(raw)
139
self._revision_cache[revision_id] = revision
97
142
def has_revision(self, revision_id):
185
230
def revision_tree(self, revision_id):
186
231
return GitRevisionTree(self, revision_id)
233
def _get_blob(self, git_id):
235
return self._blob_cache[git_id]
237
blob = self._git.cat_file('blob', git_id)
238
# print "fetched blob:", git_id
239
self._blob_cache[git_id] = blob
188
242
def get_inventory(self, revision_id):
189
243
if revision_id is None:
190
244
revision_id = revision.NULL_REVISION
191
245
if revision_id == revision.NULL_REVISION:
192
246
return inventory.Inventory(
193
247
revision_id=revision_id, root_id=None)
249
# First pass at building the inventory. We need this one to get the
250
# git ids, so we do not have to cache the entire tree text. Ideally,
251
# this should be all we need to do.
194
252
git_commit = ids.convert_revision_id_bzr_to_git(revision_id)
195
253
git_inventory = self._git.get_inventory(git_commit)
196
return self._parse_inventory(revision_id, git_inventory)
254
# print "fetched inventory:", git_commit
255
inv = self._parse_inventory(revision_id, git_inventory)
257
# Second pass at building the inventory. There we retrieve additional
258
# data that bzrlib requires: text sizes, sha1s, symlink targets and
259
# revisions that introduced inventory entries
260
inv.git_file_data = {}
261
for file_id in sorted(inv.git_ids.iterkeys()):
262
git_id = inv.git_ids[file_id]
264
self._set_entry_text_info(inv, entry, git_id)
265
for file_id in sorted(inv.git_ids.iterkeys()):
266
git_id = inv.git_ids[file_id]
268
path = inv.id2path(file_id)
269
self._set_entry_revision(entry, revision_id, path, git_id)
199
273
def _parse_inventory(klass, revid, git_inv):
200
274
# For now, git inventory do not have root ids. It is not clear that we
201
275
# can reliably support root ids. -- David Allouche 2007-12-28
202
276
inv = inventory.Inventory(revision_id=revid)
203
278
for perms, git_kind, git_id, path in git_inv:
205
280
executable = False
206
281
if git_kind == 'blob':
208
282
if perms[1] == '0':
210
284
executable = bool(int(perms[-3:], 8) & 0111)
221
295
# XXX: Maybe the file id should be prefixed by file kind, so when
222
296
# the kind of path changes, the id changes too.
223
297
# -- David Allouche 2007-12-28.
224
entry = inv.add_path(path, kind, file_id=path.encode('utf-8'))
225
entry.text_sha1 = text_sha1
298
file_id = escape_file_id(path.encode('utf-8'))
299
entry = inv.add_path(path, kind, file_id=file_id)
226
300
entry.executable = executable
301
inv.git_ids[file_id] = git_id
302
inv.root.revision = revid
305
def _set_entry_text_info(self, inv, entry, git_id):
306
if entry.kind == 'directory':
308
lines = self._get_blob(git_id)
309
entry.text_size = sum(len(line) for line in lines)
310
entry.text_sha1 = osutils.sha_strings(lines)
311
if entry.kind == 'symlink':
312
entry.symlink_target = ''.join(lines)
313
inv.git_file_data[entry.file_id] = lines
315
def _get_file_revision(self, revision_id, path):
316
lines = self._git.rev_list(
317
[ids.convert_revision_id_bzr_to_git(revision_id)],
318
max_count=1, topo_order=True, paths=[path])
320
result = ids.convert_revision_id_git_to_bzr(line[:-1])
321
# print "fetched file revision", line[:-1], path
324
# The various version of _get_entry_revision can be tested by pulling from
325
# the git repo of git itself. First pull up to r700, then r702 to
326
# reproduce the RevisionNotPresent errors.
328
def _set_entry_revision_unoptimized(self, entry, revid, path, git_id):
329
# This is unusably slow and will lead to recording a few unnecessary
330
# duplicated file texts. But it seems to be consistent enough to let
331
# pulls resume without causing RevisionNotPresent errors.
332
entry.revision = self._get_file_revision(revid, path)
334
def _set_entry_revision_optimized1(self, entry, revid, path, git_id):
335
# This is much faster, produces fewer unique file texts, but will
336
# cause RevisionNotPresent errors when resuming pull.
338
# Oops, this does not account for changes in executable bit. That is
339
# probably why it produces fewer unique texts.
340
cached = self._entry_revision_cache.get((revid, path, git_id))
341
if cached is not None:
342
entry.revision = cached
344
revision = self.get_revision(revid)
345
for parent_id in revision.parent_ids:
346
entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))
347
if entry_rev is not None:
350
entry_rev = self._get_file_revision(revid, path)
351
self._entry_revision_cache[(revid, path, git_id)] = entry_rev
352
entry.revision = entry_rev
354
def _set_entry_revision_optimized2(self, entry, revid, path, git_id):
355
# This is slower than the previous one, and does not appear to have a
356
# subtantially different effect. Same number of unique texts, same
357
# RevisionNotPresent error.
359
# Oops, this does not account for changes in executable bit. That is
360
# probably why it produces fewer unique texts.
361
cached = self._entry_revision_cache.get((revid, path, git_id))
362
if cached is not None:
363
entry.revision = cached
365
revision = self.get_revision(revid)
367
for parent_id in revision.parent_ids:
368
entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))
369
if entry_rev is not None:
370
parent_hits.append(entry_rev)
371
if len(parent_hits) == len(revision.parent_ids) and len(set(parent_hits)) == 1:
372
entry_rev = parent_hits[0]
374
entry_rev = self._get_file_revision(revid, path)
375
self._entry_revision_cache[(revid, path, git_id)] = entry_rev
376
entry.revision = entry_rev
378
_original_get_inventory = get_inventory
379
def _get_inventory_caching(self, revid):
380
if revid in self._inventory_cache:
381
return self._inventory_cache[revid]
382
inv = self._original_get_inventory(revid)
383
self._inventory_cache[revid] = inv
386
def _set_entry_revision_optimized3(self, entry, revid, path, git_id):
387
# Depends on _get_inventory_caching.
389
# Set the revision of directories to the current revision. It's not
390
# accurate, but we cannot compare directory contents from here.
391
if entry.kind == 'directory':
392
entry.revision = revid
394
# Build ancestral inventories by walking parents depth first. Ideally
395
# this should be done in an inter-repository, where already imported
396
# data can be used as reference.
397
current_revid = revid
398
revision = self.get_revision(revid)
399
pending_revids = list(reversed(revision.parent_ids))
400
while pending_revids:
401
revid = pending_revids.pop()
402
if revid in self._inventory_cache:
404
# Not in cache, ensure parents are in cache first.
405
pending_revids.append(revid)
406
revision = self.get_revision(revid)
407
for parent_id in reversed(revision.parent_ids):
408
if parent_id not in self._inventory_cache:
409
pending_revids.extend(reversed(revision.parent_ids))
412
# All parents are in cache, we can now build this inventory.
413
revid = pending_revids.pop()
414
self.get_inventory(revid) # populate cache
415
# We now have all ancestral inventories in the cache. Get entries by
416
# the same file_id in parent inventories, and use the revision of the
417
# first one that has the same text_sha1 and executable bit.
418
revision = self.get_revision(current_revid)
419
for revid in revision.parent_ids:
420
inventory = self.get_inventory(revid)
421
if entry.file_id in inventory:
422
parent_entry = inventory[entry.file_id]
423
if (parent_entry.text_sha1 == entry.text_sha1
424
and parent_entry.executable == entry.executable):
425
entry.revision = parent_entry.revision
427
# If we get here, that means we found no matching parent entry, use
428
# the current revision.
429
entry.revision = current_revid
431
def _set_entry_revision_optimized4(self, entry, revid, path, git_id):
432
# Same as optimized1, but uses the executable bit in the cache index.
433
# That appears to have the same behaviour as the unoptimized version.
434
cached = self._entry_revision_cache.get(
435
(revid, path, git_id, entry.executable))
436
if cached is not None:
437
entry.revision = cached
439
revision = self.get_revision(revid)
440
for parent_id in revision.parent_ids:
441
entry_rev = self._entry_revision_cache.get(
442
(parent_id, path, git_id, entry.executable))
443
if entry_rev is not None:
446
entry_rev = self._get_file_revision(revid, path)
447
self._entry_revision_cache[
448
(revid, path, git_id, entry.executable)] = entry_rev
449
entry.revision = entry_rev
451
def _set_entry_revision_optimized5(self, entry, revid, path, git_id):
452
# Same as optimized4, but makes get_inventory non-reentrant, and uses
453
# a more structured cache.
455
# cache[revision][path, git_id, executable] -> revision
457
# If a revision is in the cache, we assume it contains entries for the
458
# whole inventory. So if all parent revisions are in the cache, but no
459
# parent entry is present, then the entry revision is the current
460
# revision. That amortizes the number of git calls for large pulls to
462
cached = self._entry_revision_cache.get(revid, {}).get(
463
(path, git_id, entry.executable))
464
if cached is not None:
465
entry.revision = cached
467
revision = self.get_revision(revid)
468
all_parents_in_cache = True
469
for parent_id in revision.parent_ids:
470
if parent_id not in self._entry_revision_cache:
471
all_parents_in_cache = False
473
entry_rev = self._entry_revision_cache[parent_id].get(
474
(path, git_id, entry.executable))
475
if entry_rev is not None:
478
if all_parents_in_cache:
481
entry_rev = self._get_file_revision(revid, path)
482
self._entry_revision_cache.setdefault(
483
revid, {})[(path, git_id, entry.executable)] = entry_rev
484
entry.revision = entry_rev
486
_set_entry_revision = _set_entry_revision_optimized5
487
#get_inventory = _get_inventory_caching
490
def escape_file_id(file_id):
491
return file_id.replace('_', '__').replace(' ', '_s')
230
493
class GitRevisionTree(revisiontree.RevisionTree):