/brz/remove-bazaar : revision 1185.82.1

1

#!/usr/bin/env python

2

"""\

3

Read in a changeset output, and process it into a Changeset object.

4

"""

5

6

import os

7

import pprint

8

from cStringIO import StringIO

9

10

from bzrlib.tree import Tree

11

from bzrlib.trace import mutter, warning

12

from bzrlib.errors import BzrError

13

from bzrlib.xml5 import serializer_v5

14

from bzrlib.osutils import sha_file, sha_string

15

from bzrlib.revision import Revision

16

from bzrlib.inventory import (Inventory, InventoryEntry,

17

InventoryDirectory, InventoryFile,

18

InventoryLink)

19

20

from common import decode, get_header, header_str

21

22

class BadChangeset(Exception): pass

23

class MalformedHeader(BadChangeset): pass

24

class MalformedPatches(BadChangeset): pass

25

class MalformedFooter(BadChangeset): pass

26

27

def _unescape(name):

28

"""Now we want to find the filename effected.

29

Unfortunately the filename is written out as

30

repr(filename), which means that it surrounds

31

the name with quotes which may be single or double

32

(single is preferred unless there is a single quote in

33

the filename). And some characters will be escaped.

34

35

TODO: There has to be some pythonic way of undo-ing the

36

representation of a string rather than using eval.

37

"""

38

delimiter = name[0]

39

if name[-1] != delimiter:

40

raise BadChangeset('Could not properly parse the'

41

' filename: %r' % name)

42

# We need to handle escaped hexadecimals too.

43

return name[1:-1].replace('\"', '"').replace("\'", "'")

44

45

class RevisionInfo(object):

46

"""Gets filled out for each revision object that is read.

47

"""

48

def __init__(self, revision_id):

49

self.revision_id = revision_id

50

self.sha1 = None

51

self.committer = None

52

self.date = None

53

self.timestamp = None

54

self.timezone = None

55

self.inventory_sha1 = None

56

57

self.parents = None

58

self.parent_sha1s = {}

59

self.message = None

60

61

def __str__(self):

62

return pprint.pformat(self.__dict__)

63

64

def as_revision(self):

65

rev = Revision(revision_id=self.revision_id,

66

committer=self.committer,

67

timestamp=float(self.timestamp),

68

timezone=int(self.timezone),

69

inventory_sha1=self.inventory_sha1,

70

message='\n'.join(self.message))

71

72

if self.parents:

73

for parent in self.parents:

74

revision_id, sha1 = parent.split()

75

rev.parent_ids.append(revision_id)

76

self.parent_sha1s[revision_id] = sha1

77

78

return rev

79

80

class ChangesetInfo(object):

81

"""This contains the meta information. Stuff that allows you to

82

recreate the revision or inventory XML.

83

"""

84

def __init__(self):

85

self.committer = None

86

self.date = None

87

self.message = None

88

self.base = None

89

self.base_sha1 = None

90

91

# A list of RevisionInfo objects

92

self.revisions = []

93

94

self.actions = []

95

96

# The next entries are created during complete_info() and

97

# other post-read functions.

98

99

# A list of real Revision objects

100

self.real_revisions = []

101

102

self.timestamp = None

103

self.timezone = None

104

105

def __str__(self):

106

return pprint.pformat(self.__dict__)

107

108

def complete_info(self):

109

"""This makes sure that all information is properly

110

split up, based on the assumptions that can be made

111

when information is missing.

112

"""

113

from common import unpack_highres_date

114

# Put in all of the guessable information.

115

if not self.timestamp and self.date:

116

self.timestamp, self.timezone = unpack_highres_date(self.date)

117

118

self.real_revisions = []

119

for rev in self.revisions:

120

if rev.timestamp is None:

121

if rev.date is not None:

122

rev.timestamp, rev.timezone = \

123

unpack_highres_date(rev.date)

124

else:

125

rev.timestamp = self.timestamp

126

rev.timezone = self.timezone

127

if rev.message is None and self.message:

128

rev.message = self.message

129

if rev.committer is None and self.committer:

130

rev.committer = self.committer

131

self.real_revisions.append(rev.as_revision())

132

133

if self.base is None:

134

# When we don't have a base, then the real base

135

# is the first parent of the first revision listed

136

rev = self.real_revisions[0]

137

if len(rev.parent_ids) == 0:

138

# There is no base listed, and

139

# the lowest revision doesn't have a parent

140

# so this is probably against the empty tree

141

# and thus base truly is None

142

self.base = None

143

self.base_sha1 = None

144

else:

145

self.base = rev.parent_ids[0]

146

self.base_sha1 = self.revisions[0].parent_sha1s[self.base]

147

148

def _get_target(self):

149

"""Return the target revision."""

150

if len(self.real_revisions) > 0:

151

return self.real_revisions[-1].revision_id

152

elif len(self.revisions) > 0:

153

return self.revisions[-1].revision_id

154

return None

155

156

target = property(_get_target, doc='The target revision id')

157

158

class ChangesetReader(object):

159

"""This class reads in a changeset from a file, and returns

160

a Changeset object, which can then be applied against a tree.

161

"""

162

def __init__(self, from_file):

163

"""Read in the changeset from the file.

164

165

:param from_file: A file-like object (must have iterator support).

166

"""

167

object.__init__(self)

168

self.from_file = from_file

169

self._next_line = None

170

171

self.info = ChangesetInfo()

172

# We put the actual inventory ids in the footer, so that the patch

173

# is easier to read for humans.

174

# Unfortunately, that means we need to read everything before we

175

# can create a proper changeset.

176

self._read()

177

self._validate()

178

179

def _read(self):

180

self._read_header()

181

self._read_patches()

182

self._read_footer()

183

184

def _validate(self):

185

"""Make sure that the information read in makes sense

186

and passes appropriate checksums.

187

"""

188

# Fill in all the missing blanks for the revisions

189

# and generate the real_revisions list.

190

self.info.complete_info()

191

self._validate_revisions()

192

193

def _validate_revisions(self):

194

"""Make sure all revision entries match their checksum."""

195

196

# This is a mapping from each revision id to it's sha hash

197

rev_to_sha1 = {}

198

199

for rev, rev_info in zip(self.info.real_revisions, self.info.revisions):

200

assert rev.revision_id == rev_info.revision_id

201

sio = StringIO()

202

serializer_v5.write_revision(rev, sio)

203

sio.seek(0)

204

sha1 = sha_file(sio)

205

if sha1 != rev_info.sha1:

206

raise BzrError('Revision checksum mismatch.'

207

' For revision_id {%s} supplied sha1 (%s) != measured (%s)'

208

% (rev.revision_id, rev_info.sha1, sha1))

209

if rev_to_sha1.has_key(rev.revision_id):

210

raise BzrError('Revision {%s} given twice in the list'

211

% (rev.revision_id))

212

rev_to_sha1[rev.revision_id] = sha1

213

214

# Now that we've checked all the sha1 sums, we can make sure that

215

# at least for the small list we have, all of the references are

216

# valid.

217

## TODO: Bring this back

218

## for rev in self.info.real_revisions:

219

## for p_id in rev.parent_ids:

220

## if p_id in rev_to_sha1:

221

## if parent.revision_sha1 != rev_to_sha1[p_id]:

222

## raise BzrError('Parent revision checksum mismatch.'

223

## ' A parent was referenced with an'

224

## ' incorrect checksum'

225

## ': {%r} %s != %s' % (parent.revision_id,

226

## parent.revision_sha1,

227

## rev_to_sha1[parent.revision_id]))

228

229

def _validate_references_from_branch(self, branch):

230

"""Now that we have a branch which should have some of the

231

revisions we care about, go through and validate all of them

232

that we can.

233

"""

234

rev_to_sha = {}

235

inv_to_sha = {}

236

def add_sha(d, revision_id, sha1):

237

if revision_id is None:

238

if sha1 is not None:

239

raise BzrError('A Null revision should always'

240

'have a null sha1 hash')

241

return

242

if revision_id in d:

243

# This really should have been validated as part

244

# of _validate_revisions but lets do it again

245

if sha1 != d[revision_id]:

246

raise BzrError('** Revision %r referenced with 2 different'

247

' sha hashes %s != %s' % (revision_id,

248

sha1, d[revision_id]))

249

else:

250

d[revision_id] = sha1

251

252

add_sha(rev_to_sha, self.info.base, self.info.base_sha1)

253

# All of the contained revisions were checked

254

# in _validate_revisions

255

checked = {}

256

for rev_info in self.info.revisions:

257

checked[rev_info.revision_id] = True

258

add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)

259

260

for (rev, rev_info) in zip(self.info.real_revisions, self.info.revisions):

261

add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)

262

for p_id, sha1 in rev_info.parent_sha1s.iteritems():

263

add_sha(rev_to_sha, p_id, sha1)

264

265

count = 0

266

missing = {}

267

for revision_id, sha1 in rev_to_sha.iteritems():

268

if branch.has_revision(revision_id):

269

local_sha1 = branch.get_revision_sha1(revision_id)

270

if sha1 != local_sha1:

271

raise BzrError('sha1 mismatch. For revision id {%s}'

272

'local: %s, cset: %s' % (revision_id, local_sha1, sha1))

273

else:

274

count += 1

275

elif revision_id not in checked:

276

missing[revision_id] = sha1

277

278

for inv_id, sha1 in inv_to_sha.iteritems():

279

if branch.has_revision(inv_id):

280

# TODO: Currently branch.get_inventory_sha1() just returns the value

281

# that is stored in the revision text. Which is *really* bogus, because

282

# that means we aren't validating the actual text, just that we wrote

283

# and read the string. But for now, what the hell.

284

local_sha1 = branch.get_inventory_sha1(inv_id)

285

if sha1 != local_sha1:

286

raise BzrError('sha1 mismatch. For inventory id {%s}'

287

'local: %s, cset: %s' % (inv_id, local_sha1, sha1))

288

else:

289

count += 1

290

291

if len(missing) > 0:

292

# I don't know if this is an error yet

293

warning('Not all revision hashes could be validated.'

294

' Unable validate %d hashes' % len(missing))

295

mutter('Verified %d sha hashes for the changeset.' % count)

296

297

def _validate_inventory(self, inv):

298

"""At this point we should have generated the ChangesetTree,

299

so build up an inventory, and make sure the hashes match.

300

"""

301

302

assert inv is not None

303

304

# Now we should have a complete inventory entry.

305

s = serializer_v5.write_inventory_to_string(inv)

306

sha1 = sha_string(s)

307

# Target revision is the last entry in the real_revisions list

308

rev = self.info.real_revisions[-1]

309

if sha1 != rev.inventory_sha1:

310

open(',,bogus-inv', 'wb').write(s)

311

raise BzrError('Inventory sha hash mismatch.')

312

313

314

def get_changeset(self, branch):

315

"""Return the meta information, and a Changeset tree which can

316

be used to populate the local stores and working tree, respectively.

317

"""

318

self._validate_references_from_branch(branch)

319

cset_tree = ChangesetTree(branch.revision_tree(self.info.base))

320

self._update_tree(cset_tree)

321

322

inv = cset_tree.inventory

323

self._validate_inventory(inv)

324

325

return self.info, cset_tree

326

327

def _next(self):

328

"""yield the next line, but secretly

329

keep 1 extra line for peeking.

330

"""

331

for line in self.from_file:

332

last = self._next_line

333

self._next_line = line

334

if last is not None:

335

#mutter('yielding line: %r' % last)

336

yield last

337

last = self._next_line

338

self._next_line = None

339

#mutter('yielding line: %r' % last)

340

yield last

341

342

def _read_header(self):

343

"""Read the bzr header"""

344

header = get_header()

345

found = False

346

for line in self._next():

347

if found:

348

# not all mailers will keep trailing whitespace

349

if line == '#\n':

350

line = '# \n'

351

if (not line.startswith('# ') or not line.endswith('\n')

352

or decode(line[2:-1]) != header[0]):

353

raise MalformedHeader('Found a header, but it'

354

' was improperly formatted')

355

header.pop(0) # We read this line.

356

if not header:

357

break # We found everything.

358

elif (line.startswith('#') and line.endswith('\n')):

359

line = decode(line[1:-1].strip())

360

if line[:len(header_str)] == header_str:

361

if line == header[0]:

362

found = True

363

else:

364

raise MalformedHeader('Found what looks like'

365

' a header, but did not match')

366

header.pop(0)

367

else:

368

raise MalformedHeader('Did not find an opening header')

369

370

for line in self._next():

371

# The bzr header is terminated with a blank line

372

# which does not start with '#'

373

if line == '\n':

374

break

375

self._handle_next(line)

376

377

def _read_next_entry(self, line, indent=1):

378

"""Read in a key-value pair

379

"""

380

if not line.startswith('#'):

381

raise MalformedHeader('Bzr header did not start with #')

382

line = decode(line[1:-1]) # Remove the '#' and '\n'

383

if line[:indent] == ' '*indent:

384

line = line[indent:]

385

if not line:

386

return None, None# Ignore blank lines

387

388

loc = line.find(': ')

389

if loc != -1:

390

key = line[:loc]

391

value = line[loc+2:]

392

if not value:

393

value = self._read_many(indent=indent+3)

394

elif line[-1:] == ':':

395

key = line[:-1]

396

value = self._read_many(indent=indent+3)

397

else:

398

raise MalformedHeader('While looking for key: value pairs,'

399

' did not find the colon %r' % (line))

400

401

key = key.replace(' ', '_')

402

#mutter('found %s: %s' % (key, value))

403

return key, value

404

405

def _handle_next(self, line):

406

key, value = self._read_next_entry(line, indent=1)

407

mutter('_handle_next %r => %r' % (key, value))

408

if key is None:

409

return

410

411

if key == 'revision':

412

self._read_revision(value)

413

elif hasattr(self.info, key):

414

if getattr(self.info, key) is None:

415

setattr(self.info, key, value)

416

else:

417

raise MalformedHeader('Duplicated Key: %s' % key)

418

else:

419

# What do we do with a key we don't recognize

420

raise MalformedHeader('Unknown Key: %s' % key)

421

422

def _read_many(self, indent):

423

"""If a line ends with no entry, that means that it should be

424

followed with multiple lines of values.

425

426

This detects the end of the list, because it will be a line that

427

does not start properly indented.

428

"""

429

values = []

430

start = '#' + (' '*indent)

431

432

if self._next_line is None or self._next_line[:len(start)] != start:

433

return values

434

435

for line in self._next():

436

values.append(decode(line[len(start):-1]))

437

if self._next_line is None or self._next_line[:len(start)] != start:

438

break

439

return values

440

441

def _read_one_patch(self):

442

"""Read in one patch, return the complete patch, along with

443

the next line.

444

445

:return: action, lines, do_continue

446

"""

447

#mutter('_read_one_patch: %r' % self._next_line)

448

# Peek and see if there are no patches

449

if self._next_line is None or self._next_line.startswith('#'):

450

return None, [], False

451

452

first = True

453

lines = []

454

for line in self._next():

455

if first:

456

if not line.startswith('==='):

457

raise MalformedPatches('The first line of all patches'

458

' should be a bzr meta line "==="'

459

': %r' % line)

460

action = decode(line[4:-1])

461

if self._next_line is not None and self._next_line.startswith('==='):

462

return action, lines, True

463

elif self._next_line is None or self._next_line.startswith('#'):

464

return action, lines, False

465

466

if first:

467

first = False

468

else:

469

lines.append(line)

470

471

return action, lines, False

472

473

def _read_patches(self):

474

do_continue = True

475

while do_continue:

476

action, lines, do_continue = self._read_one_patch()

477

if action is not None:

478

self.info.actions.append((action, lines))

479

480

def _read_revision(self, revision_id):

481

"""Revision entries have extra information associated.

482

"""

483

rev_info = RevisionInfo(revision_id)

484

start = '# '

485

for line in self._next():

486

key,value = self._read_next_entry(line, indent=4)

487

#if key is None:

488

# continue

489

if hasattr(rev_info, key):

490

if getattr(rev_info, key) is None:

491

setattr(rev_info, key, value)

492

else:

493

raise MalformedHeader('Duplicated Key: %s' % key)

494

else:

495

# What do we do with a key we don't recognize

496

raise MalformedHeader('Unknown Key: %s' % key)

497

498

if self._next_line is None or not self._next_line.startswith(start):

499

break

500

501

self.info.revisions.append(rev_info)

502

503

def _read_footer(self):

504

"""Read the rest of the meta information.

505

506

:param first_line: The previous step iterates past what it

507

can handle. That extra line is given here.

508

"""

509

for line in self._next():

510

self._handle_next(line)

511

if self._next_line is None or not self._next_line.startswith('#'):

512

break

513

514

def _update_tree(self, cset_tree):

515

"""This fills out a ChangesetTree based on the information

516

that was read in.

517

518

:param cset_tree: A ChangesetTree to update with the new information.

519

"""

520

521

def get_rev_id(info, file_id, kind):

522

if info is not None:

523

if not info.startswith('last-changed:'):

524

raise BzrError("Last changed revision should start with 'last-changed:'"

525

': %r' % info)

526

revision_id = decode(info[13:])

527

elif cset_tree._last_changed.has_key(file_id):

528

return cset_tree._last_changed[file_id]

529

else:

530

revision_id = self.info.target

531

cset_tree.note_last_changed(file_id, revision_id)

532

return revision_id

533

534

def renamed(kind, extra, lines):

535

info = extra.split(' // ')

536

if len(info) < 2:

537

raise BzrError('renamed action lines need both a from and to'

538

': %r' % extra)

539

old_path = info[0]

540

if info[1].startswith('=> '):

541

new_path = info[1][3:]

542

else:

543

new_path = info[1]

544

545

file_id = cset_tree.path2id(old_path)

546

if len(info) > 2:

547

revision = get_rev_id(info[2], file_id, kind)

548

else:

549

revision = get_rev_id(None, file_id, kind)

550

cset_tree.note_rename(old_path, new_path)

551

if lines:

552

cset_tree.note_patch(new_path, ''.join(lines))

553

554

def removed(kind, extra, lines):

555

info = extra.split(' // ')

556

if len(info) > 1:

557

# TODO: in the future we might allow file ids to be

558

# given for removed entries

559

raise BzrError('removed action lines should only have the path'

560

': %r' % extra)

561

path = info[0]

562

cset_tree.note_deletion(path)

563

564

def added(kind, extra, lines):

565

info = extra.split(' // ')

566

if len(info) <= 1:

567

raise BzrError('add action lines require the path and file id'

568

': %r' % extra)

569

elif len(info) > 3:

570

raise BzrError('add action lines have fewer than 3 entries.'

571

': %r' % extra)

572

path = info[0]

573

if not info[1].startswith('file-id:'):

574

raise BzrError('The file-id should follow the path for an add'

575

': %r' % extra)

576

file_id = info[1][8:]

577

578

cset_tree.note_id(file_id, path, kind)

579

if len(info) > 2:

580

revision = get_rev_id(info[2], file_id, kind)

581

else:

582

revision = get_rev_id(None, file_id, kind)

583

if kind == 'directory':

584

return

585

cset_tree.note_patch(path, ''.join(lines))

586

587

def modified(kind, extra, lines):

588

info = extra.split(' // ')

589

if len(info) < 1:

590

raise BzrError('modified action lines have at least'

591

'the path in them: %r' % extra)

592

path = info[0]

593

594

file_id = cset_tree.path2id(path)

595

if len(info) > 1:

596

revision = get_rev_id(info[1], file_id, kind)

597

else:

598

revision = get_rev_id(None, file_id, kind)

599

cset_tree.note_patch(path, ''.join(lines))

600

601

602

valid_actions = {

603

'renamed':renamed,

604

'removed':removed,

605

'added':added,

606

'modified':modified

607

}

608

for action_line, lines in self.info.actions:

609

first = action_line.find(' ')

610

if first == -1:

611

raise BzrError('Bogus action line'

612

' (no opening space): %r' % action_line)

613

second = action_line.find(' ', first+1)

614

if second == -1:

615

raise BzrError('Bogus action line'

616

' (missing second space): %r' % action_line)

617

action = action_line[:first]

618

kind = action_line[first+1:second]

619

if kind not in ('file', 'directory'):

620

raise BzrError('Bogus action line'

621

' (invalid object kind %r): %r' % (kind, action_line))

622

extra = action_line[second+1:]

623

624

if action not in valid_actions:

625

raise BzrError('Bogus action line'

626

' (unrecognized action): %r' % action_line)

627

valid_actions[action](kind, extra, lines)

628

629

def read_changeset(from_file, branch):

630

"""Read in a changeset from a iterable object (such as a file object)

631

632

:param from_file: A file-like object to read the changeset information.

633

:param branch: This will be used to build the changeset tree, it needs

634

to contain the base of the changeset. (Which you probably

635

won't know about until after the changeset is parsed.)

636

"""

637

cr = ChangesetReader(from_file)

638

return cr.get_changeset(branch)

639

640

class ChangesetTree(Tree):

641

def __init__(self, base_tree):

642

self.base_tree = base_tree

643

self._renamed = {} # Mapping from old_path => new_path

644

self._renamed_r = {} # new_path => old_path

645

self._new_id = {} # new_path => new_id

646

self._new_id_r = {} # new_id => new_path

647

self._kinds = {} # new_id => kind

648

self._last_changed = {} # new_id => revision_id

649

self.patches = {}

650

self.deleted = []

651

self.contents_by_id = True

652

self._inventory = None

653

654

def __str__(self):

655

return pprint.pformat(self.__dict__)

656

657

def note_rename(self, old_path, new_path):

658

"""A file/directory has been renamed from old_path => new_path"""

659

assert not self._renamed.has_key(old_path)

660

assert not self._renamed_r.has_key(new_path)

661

self._renamed[new_path] = old_path

662

self._renamed_r[old_path] = new_path

663

664

def note_id(self, new_id, new_path, kind='file'):

665

"""Files that don't exist in base need a new id."""

666

self._new_id[new_path] = new_id

667

self._new_id_r[new_id] = new_path

668

self._kinds[new_id] = kind

669

670

def note_last_changed(self, file_id, revision_id):

671

if (self._last_changed.has_key(file_id)

672

and self._last_changed[file_id] != revision_id):

673

raise BzrError('Mismatched last-changed revision for file_id {%s}'

674

': %s != %s' % (file_id,

675

self._last_changed[file_id],

676

revision_id))

677

self._last_changed[file_id] = revision_id

678

679

def note_patch(self, new_path, patch):

680

"""There is a patch for a given filename."""

681

self.patches[new_path] = patch

682

683

def note_deletion(self, old_path):

684

"""The file at old_path has been deleted."""

685

self.deleted.append(old_path)

686

687

def old_path(self, new_path):

688

"""Get the old_path (path in the base_tree) for the file at new_path"""

689

assert new_path[:1] not in ('\\', '/')

690

old_path = self._renamed.get(new_path)

691

if old_path is not None:

692

return old_path

693

dirname,basename = os.path.split(new_path)

694

# dirname is not '' doesn't work, because

695

# dirname may be a unicode entry, and is

696

# requires the objects to be identical

697

if dirname != '':

698

old_dir = self.old_path(dirname)

699

if old_dir is None:

700

old_path = None

701

else:

702

old_path = os.path.join(old_dir, basename)

703

else:

704

old_path = new_path

705

#If the new path wasn't in renamed, the old one shouldn't be in

706

#renamed_r

707

if self._renamed_r.has_key(old_path):

708

return None

709

return old_path

710

711

def new_path(self, old_path):

712

"""Get the new_path (path in the target_tree) for the file at old_path

713

in the base tree.

714

"""

715

assert old_path[:1] not in ('\\', '/')

716

new_path = self._renamed_r.get(old_path)

717

if new_path is not None:

718

return new_path

719

if self._renamed.has_key(new_path):

720

return None

721

dirname,basename = os.path.split(old_path)

722

if dirname != '':

723

new_dir = self.new_path(dirname)

724

if new_dir is None:

725

new_path = None

726

else:

727

new_path = os.path.join(new_dir, basename)

728

else:

729

new_path = old_path

730

#If the old path wasn't in renamed, the new one shouldn't be in

731

#renamed_r

732

if self._renamed.has_key(new_path):

733

return None

734

return new_path

735

736

def path2id(self, path):

737

"""Return the id of the file present at path in the target tree."""

738

file_id = self._new_id.get(path)

739

if file_id is not None:

740

return file_id

741

old_path = self.old_path(path)

742

if old_path is None:

743

return None

744

if old_path in self.deleted:

745

return None

746

if hasattr(self.base_tree, 'path2id'):

747

return self.base_tree.path2id(old_path)

748

else:

749

return self.base_tree.inventory.path2id(old_path)

750

751

def id2path(self, file_id):

752

"""Return the new path in the target tree of the file with id file_id"""

753

path = self._new_id_r.get(file_id)

754

if path is not None:

755

return path

756

old_path = self.base_tree.id2path(file_id)

757

if old_path is None:

758

return None

759

if old_path in self.deleted:

760

return None

761

return self.new_path(old_path)

762

763

def old_contents_id(self, file_id):

764

"""Return the id in the base_tree for the given file_id,

765

or None if the file did not exist in base.

766

767

FIXME: Something doesn't seem right here. It seems like this function

768

should always either return None or file_id. Even if

769

you are doing the by-path lookup, you are doing a

770

id2path lookup, just to do the reverse path2id lookup.

771

772

Notice that you're doing the path2id on a different tree!

773

"""

774

if self.contents_by_id:

775

if self.base_tree.has_id(file_id):

776

return file_id

777

else:

778

return None

779

new_path = self.id2path(file_id)

780

return self.base_tree.path2id(new_path)

781

782

def get_file(self, file_id):

783

"""Return a file-like object containing the new contents of the

784

file given by file_id.

785

786

TODO: It might be nice if this actually generated an entry

787

in the text-store, so that the file contents would

788

then be cached.

789

"""

790

base_id = self.old_contents_id(file_id)

791

if base_id is not None:

792

patch_original = self.base_tree.get_file(base_id)

793

else:

794

patch_original = None

795

file_patch = self.patches.get(self.id2path(file_id))

796

if file_patch is None:

797

return patch_original

798

799

assert not file_patch.startswith('\\'), \

800

'Malformed patch for %s, %r' % (file_id, file_patch)

801

return patched_file(file_patch, patch_original)

802

803

def get_kind(self, file_id):

804

if file_id in self._kinds:

805

return self._kinds[file_id]

806

return self.base_tree.inventory[file_id].kind

807

808

def get_last_changed(self, file_id):

809

if file_id in self._last_changed:

810

return self._last_changed[file_id]

811

return self.base_tree.inventory[file_id].revision

812

813

def get_size_and_sha1(self, file_id):

814

"""Return the size and sha1 hash of the given file id.

815

If the file was not locally modified, this is extracted

816

from the base_tree. Rather than re-reading the file.

817

"""

818

new_path = self.id2path(file_id)

819

if new_path is None:

820

return None, None

821

if new_path not in self.patches:

822

# If the entry does not have a patch, then the

823

# contents must be the same as in the base_tree

824

ie = self.base_tree.inventory[file_id]

825

if ie.text_size is None:

826

return ie.text_size, ie.text_sha1

827

return int(ie.text_size), ie.text_sha1

828

fileobj = self.get_file(file_id)

829

content = fileobj.read()

830

return len(content), sha_string(content)

831

832

833

def _get_inventory(self):

834

"""Build up the inventory entry for the ChangesetTree.

835

836

This need to be called before ever accessing self.inventory

837

"""

838

from os.path import dirname, basename

839

840

assert self.base_tree is not None

841

base_inv = self.base_tree.inventory

842

root_id = base_inv.root.file_id

843

try:

844

# New inventories have a unique root_id

845

inv = Inventory(root_id)

846

except TypeError:

847

inv = Inventory()

848

849

def add_entry(file_id):

850

path = self.id2path(file_id)

851

if path is None:

852

return

853

parent_path = dirname(path)

854

if parent_path == u'':

855

parent_id = root_id

856

else:

857

parent_id = self.path2id(parent_path)

858

859

kind = self.get_kind(file_id)

860

revision_id = self.get_last_changed(file_id)

861

862

name = basename(path)

863

if kind == 'directory':

864

ie = InventoryDirectory(file_id, name, parent_id)

865

elif kind == 'file':

866

ie = InventoryFile(file_id, name, parent_id)

867

elif kind == 'symlink':

868

ie = InventoryLink(file_id, name, parent_id)

869

ie.revision = revision_id

870

871

if kind == 'directory':

872

ie.text_size, ie.text_sha1 = None, None

873

else:

874

ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)

875

if (ie.text_size is None) and (kind != 'directory'):

876

raise BzrError('Got a text_size of None for file_id %r' % file_id)

877

inv.add(ie)

878

879

sorted_entries = self.sorted_path_id()

880

for path, file_id in sorted_entries:

881

if file_id == inv.root.file_id:

882

continue

883

add_entry(file_id)

884

885

return inv

886

887

# Have to overload the inherited inventory property

888

# because _get_inventory is only called in the parent.

889

# Reading the docs, property() objects do not use

890

# overloading, they use the function as it was defined

891

# at that instant

892

inventory = property(_get_inventory)

893

894

def __iter__(self):

895

for path, entry in self.inventory.iter_entries():

896

yield entry.file_id

897

898

def sorted_path_id(self):

899

paths = []

900

for result in self._new_id.iteritems():

901

paths.append(result)

902

for id in self.base_tree:

903

path = self.id2path(id)

904

if path is None:

905

continue

906

paths.append((path, id))

907

paths.sort()

908

return paths

909

910

def patched_file(file_patch, original):

911

"""Produce a file-like object with the patched version of a text"""

912

from patches import iter_patched

913

from iterablefile import IterableFile

914

if file_patch == "":

915

return IterableFile(())

916

return IterableFile(iter_patched(original, file_patch.splitlines(True)))

917