80
MAX_INV_CACHE_SIZE = 50 * 1024 * 1024
83
def import_git_blob(texts, mapping, path, hexsha, base_inv, base_ie, parent_id,
84
revision_id, parent_invs, shagitmap, lookup_object, executable, symlink):
80
def import_git_blob(texts, mapping, path, hexsha, base_inv, base_inv_shamap,
81
base_ie, parent_id, revision_id, parent_invs, lookup_object,
85
83
"""Import a git blob object into a bzr repository.
87
85
:param texts: VersionedFiles to add to
150
148
ie.revision = revision_id
151
149
assert file_id is not None
152
150
assert ie.revision is not None
153
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), tuple(parent_keys), ie.text_sha1, blob.data)])
154
shamap = [(hexsha, "blob", (ie.file_id, ie.revision))]
151
if ie.kind == 'symlink':
155
chunks = blob.chunked
156
except AttributeError: # older version of dulwich
158
texts.insert_record_stream([ChunkedContentFactory((file_id, ie.revision), tuple(parent_keys), ie.text_sha1, chunks)])
159
shamap = { ie.file_id: hexsha }
158
161
if base_ie is not None:
159
162
old_path = base_inv.id2path(file_id)
173
176
def import_git_submodule(texts, mapping, path, hexsha, base_inv, base_ie,
174
parent_id, revision_id, parent_invs, shagitmap, lookup_object):
177
parent_id, revision_id, parent_invs, lookup_object):
175
178
file_id = mapping.generate_file_id(path)
176
179
ie = TreeReference(file_id, urlutils.basename(path.decode("utf-8")),
184
187
base_ie.reference_revision == ie.reference_revision):
185
188
ie.revision = base_ie.revision
186
189
ie.reference_revision = mapping.revision_id_foreign_to_bzr(hexsha)
187
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), (), None, "")])
190
texts.insert_record_stream([ChunkedContentFactory((file_id, ie.revision), (), None, [])])
188
191
invdelta = [(oldpath, path, file_id, ie)]
189
192
return invdelta, {}, {}
204
def import_git_tree(texts, mapping, path, hexsha, base_inv, base_ie, parent_id,
205
revision_id, parent_invs, shagitmap, lookup_object, allow_submodules=False):
207
def import_git_tree(texts, mapping, path, hexsha, base_inv, base_inv_shamap,
208
base_ie, parent_id, revision_id, parent_invs, lookup_object,
209
allow_submodules=False):
206
210
"""Import a git tree object into a bzr repository.
208
212
:param texts: VersionedFiles object to add to
219
223
if base_ie is None:
220
224
# Newly appeared here
221
225
ie.revision = revision_id
222
texts.insert_record_stream([FulltextContentFactory((file_id, ie.revision), (), None, "")])
226
texts.insert_record_stream([ChunkedContentFactory((file_id, ie.revision), (), None, [])])
223
227
invdelta.append((None, path, file_id, ie))
225
229
# See if this has changed at all
227
base_sha = shagitmap.lookup_tree(file_id, base_inv.revision_id)
231
base_sha = base_inv_shamap.lookup_tree(file_id)
232
except (KeyError, NotImplementedError):
231
235
if base_sha == hexsha:
233
237
return [], {}, []
234
238
if base_ie.kind != "directory":
235
239
ie.revision = revision_id
236
texts.insert_record_stream([FulltextContentFactory((ie.file_id, ie.revision), (), None, "")])
240
texts.insert_record_stream([ChunkedContentFactory((ie.file_id, ie.revision), (), None, [])])
237
241
invdelta.append((base_inv.id2path(ie.file_id), path, ie.file_id, ie))
238
242
if base_ie is not None and base_ie.kind == "directory":
239
243
base_children = base_ie.children
251
255
if stat.S_ISDIR(mode):
252
256
subinvdelta, grandchildmodes, subshamap = import_git_tree(
253
257
texts, mapping, child_path, child_hexsha, base_inv,
254
base_children.get(basename), file_id, revision_id,
255
parent_invs, shagitmap, lookup_object,
258
base_inv_shamap, base_children.get(basename), file_id,
259
revision_id, parent_invs, lookup_object,
256
260
allow_submodules=allow_submodules)
257
invdelta.extend(subinvdelta)
258
child_modes.update(grandchildmodes)
259
shamap.extend(subshamap)
260
261
elif S_ISGITLINK(mode): # submodule
261
262
if not allow_submodules:
262
263
raise SubmodulesRequireSubtrees()
263
264
subinvdelta, grandchildmodes, subshamap = import_git_submodule(
264
265
texts, mapping, child_path, child_hexsha, base_inv, base_children.get(basename),
265
file_id, revision_id, parent_invs, shagitmap, lookup_object)
266
invdelta.extend(subinvdelta)
267
child_modes.update(grandchildmodes)
268
shamap.extend(subshamap)
266
file_id, revision_id, parent_invs, lookup_object)
270
268
subinvdelta, subshamap = import_git_blob(texts, mapping,
271
child_path, child_hexsha, base_inv, base_children.get(basename), file_id,
272
revision_id, parent_invs, shagitmap, lookup_object,
269
child_path, child_hexsha, base_inv, base_inv_shamap,
270
base_children.get(basename), file_id,
271
revision_id, parent_invs, lookup_object,
273
272
mode_is_executable(mode), stat.S_ISLNK(mode))
274
invdelta.extend(subinvdelta)
275
shamap.extend(subshamap)
274
child_modes.update(grandchildmodes)
275
invdelta.extend(subinvdelta)
276
shamap.update(subshamap)
276
277
if mode not in (stat.S_IFDIR, DEFAULT_FILE_MODE,
277
278
stat.S_IFLNK, DEFAULT_FILE_MODE|0111):
278
279
child_modes[child_path] = mode
280
281
if base_ie is not None and base_ie.kind == "directory":
281
282
invdelta.extend(remove_disappeared_children(base_inv.id2path(file_id),
282
283
base_children, existing_children))
283
shamap.append((hexsha, "tree", (file_id, revision_id)))
284
shamap[file_id] = hexsha
284
285
return invdelta, child_modes, shamap
287
def approx_inv_size(inv):
288
# Very rough estimate, 1k per inventory entry
289
return len(inv) * 1024
292
288
def import_git_commit(repo, mapping, head, lookup_object,
293
289
target_git_object_retriever, parent_invs_cache):
294
290
o = lookup_object(head)
296
292
# We have to do this here, since we have to walk the tree and
297
293
# we need to make sure to import the blobs / trees with the right
298
294
# path; this may involve adding them more than once.
300
for parent_id in rev.parent_ids:
302
parent_invs.append(parent_invs_cache[parent_id])
304
parent_inv = repo.get_inventory(parent_id)
305
parent_invs.append(parent_inv)
306
parent_invs_cache[parent_id] = parent_inv
295
parent_invs = parent_invs_cache.get_inventories(rev.parent_ids)
307
296
if parent_invs == []:
308
297
base_inv = Inventory(root_id=None)
299
base_inv_shamap = None # Should never be accessed
311
301
base_inv = parent_invs[0]
312
302
base_ie = base_inv.root
303
base_inv_shamap = target_git_object_retriever._idmap.get_inventory_sha_map(base_inv.revision_id)
313
304
inv_delta, unusual_modes, shamap = import_git_tree(repo.texts,
314
mapping, "", o.tree, base_inv, base_ie, None, rev.revision_id,
315
parent_invs, target_git_object_retriever._idmap, lookup_object,
305
mapping, "", o.tree, base_inv, base_inv_shamap, base_ie, None,
306
rev.revision_id, parent_invs, lookup_object,
316
307
allow_submodules=getattr(repo._format, "supports_tree_reference", False))
317
target_git_object_retriever._idmap.add_entries(shamap)
309
for (oldpath, newpath, fileid, new_ie) in inv_delta:
311
entries.append((fileid, None, None, None))
313
if new_ie.kind in ("file", "symlink"):
314
entries.append((fileid, "blob", shamap[fileid], new_ie.revision))
315
elif new_ie.kind == "directory":
316
entries.append((fileid, "tree", shamap[fileid], rev.revision_id))
319
target_git_object_retriever._idmap.add_entries(rev.revision_id,
320
rev.parent_ids, head, o.tree, entries)
318
321
if unusual_modes != {}:
319
322
for path, mode in unusual_modes.iteritems():
320
323
warn_unusual_mode(rev.foreign_revid, path, mode)
327
330
rev.inventory_sha1, inv = repo.add_inventory_by_delta(basis_id,
328
331
inv_delta, rev.revision_id, rev.parent_ids,
330
parent_invs_cache[rev.revision_id] = inv
333
parent_invs_cache.add(rev.revision_id, inv)
331
334
repo.add_revision(rev.revision_id, rev)
332
335
if "verify" in debug.debug_flags:
333
336
new_unusual_modes = mapping.export_unusual_file_modes(rev)
334
337
if new_unusual_modes != unusual_modes:
335
338
raise AssertionError("unusual modes don't match: %r != %r" % (unusual_modes, new_unusual_modes))
336
339
objs = inventory_to_tree_and_blobs(inv, repo.texts, mapping, unusual_modes)
337
for sha1, newobj, path in objs:
340
for newsha1, newobj, path in objs:
338
341
assert path is not None
339
oldobj = tree_lookup_path(lookup_object, o.tree, path)
341
raise AssertionError("%r != %r in %s" % (oldobj, newobj, path))
344
def import_git_objects(repo, mapping, object_iter, target_git_object_retriever,
345
(oldmode, oldsha1) = tree_lookup_path(lookup_object, o.tree, path)
346
if oldsha1 != newsha1:
347
raise AssertionError("%r != %r in %s" % (oldsha1, newsha1, path))
350
def import_git_objects(repo, mapping, object_iter,
351
target_git_object_retriever, heads, pb=None, limit=None):
346
352
"""Import a set of git objects into a bzr repository.
348
354
:param repo: Target Bazaar repository
349
355
:param mapping: Mapping to use
350
356
:param object_iter: Iterator over Git objects.
357
:return: Tuple with pack hints and last imported revision id
352
target_git_object_retriever._idmap.start_write_group() # FIXME: try/finally
353
359
def lookup_object(sha):
355
361
return object_iter[sha]
379
384
squash_revision(repo, rev)
380
385
graph.append((o.id, o.parents))
381
target_git_object_retriever._idmap.add_entry(o.id, "commit",
382
(rev.revision_id, o.tree))
383
386
heads.extend([p for p in o.parents if p not in checked])
384
387
elif isinstance(o, Tag):
385
heads.append(o.object[1])
388
if o.object[1] not in checked:
389
heads.append(o.object[1])
387
391
trace.warning("Unable to import head object %r" % o)
388
392
checked.add(o.id)
424
432
"""See InterRepository.copy_content."""
425
433
self.fetch(revision_id, pb, find_ghosts=False)
427
def fetch(self, revision_id=None, pb=None, find_ghosts=False, mapping=None,
429
self.fetch_refs(revision_id=revision_id, pb=pb, find_ghosts=find_ghosts,
430
mapping=mapping, fetch_spec=fetch_spec)
435
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
436
mapping=None, fetch_spec=None):
437
self.fetch_refs(revision_id=revision_id, pb=pb,
438
find_ghosts=find_ghosts, mapping=mapping, fetch_spec=fetch_spec)
433
441
class InterGitNonGitRepository(InterGitRepository):
453
461
ret = [mapping.revision_id_bzr_to_foreign(revid)[0] for revid in interesting_heads if revid not in (None, NULL_REVISION)]
454
462
return [rev for rev in ret if not self.target.has_revision(mapping.revision_id_foreign_to_bzr(rev))]
455
pack_hint = self.fetch_objects(determine_wants, mapping, pb)
463
(pack_hint, _) = self.fetch_objects(determine_wants, mapping, pb)
456
464
if pack_hint is not None and self.target._format.pack_compresses:
457
465
self.target.pack(hint=pack_hint)
458
466
if interesting_heads is not None:
486
494
map(all_parents.update, parent_map.itervalues())
487
495
return set(all_revs) - all_parents
489
def fetch_objects(self, determine_wants, mapping, pb=None):
497
def fetch_objects(self, determine_wants, mapping, pb=None, limit=None):
490
498
def progress(text):
491
499
report_git_progress(pb, text)
492
500
store = BazaarObjectStore(self.target, mapping)
510
518
record_determine_wants, graph_walker,
511
519
store.get_raw, progress)
512
520
return import_git_objects(self.target, mapping,
513
objects_iter, store, recorded_wants, pb)
521
objects_iter, store, recorded_wants, pb, limit)
516
524
create_pb.finished()