/brz/remove-bazaar : revision 0.200.43

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to git_repository.py

Committer: David Allouche
Date: 2007-12-29 21:35:28 UTC
mto: (0.312.1 master) (6883.23.1 bundle-git)
mto: This revision was merged to the branch mainline in revision 6960.
Revision ID: ddaa@canonical.com-20071229213528-8iyo5ne7xx6xsgzh

Ultra-experimental support for "bzr pull". No test. No sanity.

files modified:
git_branch.py

git_repository.py

ids.py

model.py

Show diffs side-by-side

added added

removed removed

git_repository.py

from bzrlib import (

deprecated_graph,

errors,

inventory,

osutils,

repository,

revision,

revisiontree,

gitdirectory = gitdir.transport.local_abspath('.')

self._git = model.GitModel(gitdirectory)

self._revision_cache = {}

self._blob_cache = {}

self._entry_revision_cache = {}

self._inventory_cache = {}

def _ancestor_revisions(self, revision_ids):

if revision_ids is not None:

git_revisions = None

for lines in self._git.ancestor_lines(git_revisions):

yield self._parse_rev(lines)

# print "fetched ancestors:", git_revisions

def is_shared(self):

return True

else:

param = None

git_graph = self._git.get_revision_graph(param)

# print "fetched revision graph:", param

for node, parents in git_graph.iteritems():

bzr_node = ids.convert_revision_id_git_to_bzr(node)

bzr_parents = [ids.convert_revision_id_git_to_bzr(n)

revision_ids = [ids.convert_revision_id_bzr_to_git(r)

for r in revision_ids]

git_graph = self._git.get_revision_graph(revision_ids)

# print "fetched revision graph (ghosts):", revision_ids

for node, parents in git_graph.iteritems():

bzr_node = ids.convert_revision_id_git_to_bzr(node)

bzr_parents = [ids.convert_revision_id_git_to_bzr(n)

graph.add_node(bzr_node, bzr_parents)

return graph

def get_ancestry(self, revision_id):

param = [ids.convert_revision_id_bzr_to_git(revision_id)]

git_ancestry = self._git.get_ancestry(param)

100

# print "fetched ancestry:", param

101

return [None] + [

102

ids.convert_revision_id_git_to_bzr(git_id)

103

for git_id in git_ancestry]

104

105

def get_signature_text(self, revision_id):

106

raise errors.NoSuchRevision(self, revision_id)

107

108

def get_inventory_xml(self, revision_id):

109

"""See Repository.get_inventory_xml()."""

110

return bzrlib.xml5.serializer_v5.write_inventory_to_string(

111

self.get_inventory(revision_id))

112

113

def get_inventory_sha1(self, revision_id):

114

"""Get the sha1 for the XML representation of an inventory.

115

116

:param revision_id: Revision id of the inventory for which to return

117

the SHA1.

118

:return: XML string

119

"""

120

121

return osutils.sha_string(self.get_inventory_xml(revision_id))

122

123

def get_revision_xml(self, revision_id):

124

"""Return the XML representation of a revision.

125

126

:param revision_id: Revision for which to return the XML.

127

:return: XML string

128

"""

129

return bzrlib.xml5.serializer_v5.write_revision_to_string(

130

self.get_revision(revision_id))

131

132

def get_revision(self, revision_id):

133

if revision_id in self._revision_cache:

134

return self._revision_cache[revision_id]

raw = self._git.rev_list(

[ids.convert_revision_id_bzr_to_git(revision_id)],

max_count=1, header=True)

return self._parse_rev(raw)

135

git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)

136

raw = self._git.rev_list([git_commit_id], max_count=1, header=True)

137

# print "fetched revision:", git_commit_id

138

revision = self._parse_rev(raw)

139

self._revision_cache[revision_id] = revision

140

return revision

141

142

def has_revision(self, revision_id):

143

try:

176

221

sign = {'+': +1, '-': -1}[tz[0]]

177

222

hours = int(tz[1:3])

178

223

minutes = int(tz[3:])

179

return float(sign * 60 * (60 * hours + minutes))

224

return sign * 60 * (60 * hours + minutes)

180

225

181

226

def revision_trees(self, revids):

182

227

for revid in revids:

185

230

def revision_tree(self, revision_id):

186

231

return GitRevisionTree(self, revision_id)

187

232

233

def _get_blob(self, git_id):

234

try:

235

return self._blob_cache[git_id]

236

except KeyError:

237

blob = self._git.cat_file('blob', git_id)

238

# print "fetched blob:", git_id

239

self._blob_cache[git_id] = blob

240

return blob

241

188

242

def get_inventory(self, revision_id):

189

243

if revision_id is None:

190

244

revision_id = revision.NULL_REVISION

191

245

if revision_id == revision.NULL_REVISION:

192

246

return inventory.Inventory(

193

247

revision_id=revision_id, root_id=None)

248

249

# First pass at building the inventory. We need this one to get the

250

# git ids, so we do not have to cache the entire tree text. Ideally,

251

# this should be all we need to do.

194

252

git_commit = ids.convert_revision_id_bzr_to_git(revision_id)

195

253

git_inventory = self._git.get_inventory(git_commit)

196

return self._parse_inventory(revision_id, git_inventory)

254

# print "fetched inventory:", git_commit

255

inv = self._parse_inventory(revision_id, git_inventory)

256

257

# Second pass at building the inventory. There we retrieve additional

258

# data that bzrlib requires: text sizes, sha1s, symlink targets and

259

# revisions that introduced inventory entries

260

inv.git_file_data = {}

261

for file_id in sorted(inv.git_ids.iterkeys()):

262

git_id = inv.git_ids[file_id]

263

entry = inv[file_id]

264

self._set_entry_text_info(inv, entry, git_id)

265

for file_id in sorted(inv.git_ids.iterkeys()):

266

git_id = inv.git_ids[file_id]

267

entry = inv[file_id]

268

path = inv.id2path(file_id)

269

self._set_entry_revision(entry, revision_id, path, git_id)

270

return inv

197

271

198

272

@classmethod

199

273

def _parse_inventory(klass, revid, git_inv):

200

274

# For now, git inventory do not have root ids. It is not clear that we

201

275

# can reliably support root ids. -- David Allouche 2007-12-28

202

276

inv = inventory.Inventory(revision_id=revid)

277

inv.git_ids = {}

203

278

for perms, git_kind, git_id, path in git_inv:

204

279

text_sha1 = None

205

280

executable = False

206

281

if git_kind == 'blob':

207

text_sha1 = git_id

208

282

if perms[1] == '0':

209

283

kind = 'file'

210

284

executable = bool(int(perms[-3:], 8) & 0111)

221

295

# XXX: Maybe the file id should be prefixed by file kind, so when

222

296

# the kind of path changes, the id changes too.

223

297

# -- David Allouche 2007-12-28.

224

entry = inv.add_path(path, kind, file_id=path.encode('utf-8'))

225

entry.text_sha1 = text_sha1

298

file_id = escape_file_id(path.encode('utf-8'))

299

entry = inv.add_path(path, kind, file_id=file_id)

226

300

entry.executable = executable

227

return inv

228

301

inv.git_ids[file_id] = git_id

302

inv.root.revision = revid

303

return inv

304

305

def _set_entry_text_info(self, inv, entry, git_id):

306

if entry.kind == 'directory':

307

return

308

lines = self._get_blob(git_id)

309

entry.text_size = sum(len(line) for line in lines)

310

entry.text_sha1 = osutils.sha_strings(lines)

311

if entry.kind == 'symlink':

312

entry.symlink_target = ''.join(lines)

313

inv.git_file_data[entry.file_id] = lines

314

315

def _get_file_revision(self, revision_id, path):

316

lines = self._git.rev_list(

317

[ids.convert_revision_id_bzr_to_git(revision_id)],

318

max_count=1, topo_order=True, paths=[path])

319

[line] = lines

320

result = ids.convert_revision_id_git_to_bzr(line[:-1])

321

# print "fetched file revision", line[:-1], path

322

return result

323

324

# The various version of _get_entry_revision can be tested by pulling from

325

# the git repo of git itself. First pull up to r700, then r702 to

326

# reproduce the RevisionNotPresent errors.

327

328

def _set_entry_revision_unoptimized(self, entry, revid, path, git_id):

329

# This is unusably slow and will lead to recording a few unnecessary

330

# duplicated file texts. But it seems to be consistent enough to let

331

# pulls resume without causing RevisionNotPresent errors.

332

entry.revision = self._get_file_revision(revid, path)

333

334

def _set_entry_revision_optimized1(self, entry, revid, path, git_id):

335

# This is much faster, produces fewer unique file texts, but will

336

# cause RevisionNotPresent errors when resuming pull.

337

338

# Oops, this does not account for changes in executable bit. That is

339

# probably why it produces fewer unique texts.

340

cached = self._entry_revision_cache.get((revid, path, git_id))

341

if cached is not None:

342

entry.revision = cached

343

return

344

revision = self.get_revision(revid)

345

for parent_id in revision.parent_ids:

346

entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))

347

if entry_rev is not None:

348

break

349

else:

350

entry_rev = self._get_file_revision(revid, path)

351

self._entry_revision_cache[(revid, path, git_id)] = entry_rev

352

entry.revision = entry_rev

353

354

def _set_entry_revision_optimized2(self, entry, revid, path, git_id):

355

# This is slower than the previous one, and does not appear to have a

356

# subtantially different effect. Same number of unique texts, same

357

# RevisionNotPresent error.

358

359

# Oops, this does not account for changes in executable bit. That is

360

# probably why it produces fewer unique texts.

361

cached = self._entry_revision_cache.get((revid, path, git_id))

362

if cached is not None:

363

entry.revision = cached

364

return

365

revision = self.get_revision(revid)

366

parent_hits = []

367

for parent_id in revision.parent_ids:

368

entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))

369

if entry_rev is not None:

370

parent_hits.append(entry_rev)

371

if len(parent_hits) == len(revision.parent_ids) and len(set(parent_hits)) == 1:

372

entry_rev = parent_hits[0]

373

else:

374

entry_rev = self._get_file_revision(revid, path)

375

self._entry_revision_cache[(revid, path, git_id)] = entry_rev

376

entry.revision = entry_rev

377

378

_original_get_inventory = get_inventory

379

def _get_inventory_caching(self, revid):

380

if revid in self._inventory_cache:

381

return self._inventory_cache[revid]

382

inv = self._original_get_inventory(revid)

383

self._inventory_cache[revid] = inv

384

return inv

385

386

def _set_entry_revision_optimized3(self, entry, revid, path, git_id):

387

# Depends on _get_inventory_caching.

388

389

# Set the revision of directories to the current revision. It's not

390

# accurate, but we cannot compare directory contents from here.

391

if entry.kind == 'directory':

392

entry.revision = revid

393

return

394

# Build ancestral inventories by walking parents depth first. Ideally

395

# this should be done in an inter-repository, where already imported

396

# data can be used as reference.

397

current_revid = revid

398

revision = self.get_revision(revid)

399

pending_revids = list(reversed(revision.parent_ids))

400

while pending_revids:

401

revid = pending_revids.pop()

402

if revid in self._inventory_cache:

403

continue

404

# Not in cache, ensure parents are in cache first.

405

pending_revids.append(revid)

406

revision = self.get_revision(revid)

407

for parent_id in reversed(revision.parent_ids):

408

if parent_id not in self._inventory_cache:

409

pending_revids.extend(reversed(revision.parent_ids))

410

break

411

else:

412

# All parents are in cache, we can now build this inventory.

413

revid = pending_revids.pop()

414

self.get_inventory(revid) # populate cache

415

# We now have all ancestral inventories in the cache. Get entries by

416

# the same file_id in parent inventories, and use the revision of the

417

# first one that has the same text_sha1 and executable bit.

418

revision = self.get_revision(current_revid)

419

for revid in revision.parent_ids:

420

inventory = self.get_inventory(revid)

421

if entry.file_id in inventory:

422

parent_entry = inventory[entry.file_id]

423

if (parent_entry.text_sha1 == entry.text_sha1

424

and parent_entry.executable == entry.executable):

425

entry.revision = parent_entry.revision

426

return

427

# If we get here, that means we found no matching parent entry, use

428

# the current revision.

429

entry.revision = current_revid

430

431

def _set_entry_revision_optimized4(self, entry, revid, path, git_id):

432

# Same as optimized1, but uses the executable bit in the cache index.

433

# That appears to have the same behaviour as the unoptimized version.

434

cached = self._entry_revision_cache.get(

435

(revid, path, git_id, entry.executable))

436

if cached is not None:

437

entry.revision = cached

438

return

439

revision = self.get_revision(revid)

440

for parent_id in revision.parent_ids:

441

entry_rev = self._entry_revision_cache.get(

442

(parent_id, path, git_id, entry.executable))

443

if entry_rev is not None:

444

break

445

else:

446

entry_rev = self._get_file_revision(revid, path)

447

self._entry_revision_cache[

448

(revid, path, git_id, entry.executable)] = entry_rev

449

entry.revision = entry_rev

450

451

def _set_entry_revision_optimized5(self, entry, revid, path, git_id):

452

# Same as optimized4, but makes get_inventory non-reentrant, and uses

453

# a more structured cache.

454

455

# cache[revision][path, git_id, executable] -> revision

456

457

# If a revision is in the cache, we assume it contains entries for the

458

# whole inventory. So if all parent revisions are in the cache, but no

459

# parent entry is present, then the entry revision is the current

460

# revision. That amortizes the number of git calls for large pulls to

461

# zero.

462

cached = self._entry_revision_cache.get(revid, {}).get(

463

(path, git_id, entry.executable))

464

if cached is not None:

465

entry.revision = cached

466

return

467

revision = self.get_revision(revid)

468

all_parents_in_cache = True

469

for parent_id in revision.parent_ids:

470

if parent_id not in self._entry_revision_cache:

471

all_parents_in_cache = False

472

continue

473

entry_rev = self._entry_revision_cache[parent_id].get(

474

(path, git_id, entry.executable))

475

if entry_rev is not None:

476

break

477

else:

478

if all_parents_in_cache:

479

entry_rev = revid

480

else:

481

entry_rev = self._get_file_revision(revid, path)

482

self._entry_revision_cache.setdefault(

483

revid, {})[(path, git_id, entry.executable)] = entry_rev

484

entry.revision = entry_rev

485

486

_set_entry_revision = _set_entry_revision_optimized5

487

#get_inventory = _get_inventory_caching

488

489

490

def escape_file_id(file_id):

491

return file_id.replace('_', '__').replace(' ', '_s')

229

492

230

493

class GitRevisionTree(revisiontree.RevisionTree):

231

494

237

500

self._revision_id = revision_id

238

501

239

502

def get_file_lines(self, file_id):

240

obj_id = self._inventory[file_id].text_sha1

503

entry = self._inventory[file_id]

504

if entry.kind == 'directory': return []

505

return self._inventory.git_file_data[file_id]

506

507

obj_id = self._inventory.git_ids[file_id]

508

assert obj_id is not None, (

509

"git_id must not be None: %r" % (self._inventory[file_id],))

241

510

return self._repository._git.cat_file('blob', obj_id)

Older »