/brz/remove-bazaar : revision 2255.6.7

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

Committer: Aaron Bentley
Date: 2007-02-24 16:11:32 UTC
mfrom: (2255.2.129 dirstate)
mto: (2255.2.152 dirstate)
mto: This revision was merged to the branch mainline in revision 2322.
Revision ID: aaron.bentley@utoronto.ca-20070224161132-ymdyp4vnbab7ewfe

Merge from dirstate

files modified:
bzrlib/dirstate.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tree.py

bzrlib/workingtree_4.py

Show diffs side-by-side

added added

removed removed

bzrlib/dirstate.py

214

)

215

216

217

class _Bisector(object):

218

"""This just keeps track of information as we are bisecting."""

219

220

217

221

class DirState(object):

218

222

"""Record directory and metadata state for fast access.

219

223

231

235

# TODO: jam 20070221 Make sure we handle when there are duplicated records

232

236

# (like when we remove + add the same path, or we have a rename)

233

237

# TODO: jam 20070221 Figure out what to do if we have a record that exceeds

234

# the BISECT_PAGE_SIZE.

238

# the BISECT_PAGE_SIZE. For now, we just have to make it large enough

239

# that we are sure a single record will always fit.

235

240

BISECT_PAGE_SIZE = 4096

236

241

237

242

NOT_IN_MEMORY = 0

276

281

self._state_file = None

277

282

self._filename = path

278

283

self._lock_token = None

284

self._end_of_header = None

285

self._bisect_page_size = DirState.BISECT_PAGE_SIZE

279

286

280

287

def add(self, path, file_id, kind, stat, link_or_sha1):

281

288

"""Add a path to be tracked.

353

360

self._ensure_block(block_index, entry_index, utf8path)

354

361

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

355

362

363

def _bisect(self, dir_name_list):

364

"""Bisect through the disk structure for specific rows."""

365

self._requires_lock()

366

# We need the file pointer to be right after the initial header block

367

self._read_header_if_needed()

368

# If _dirblock_state was in memory, we should just return info from

369

# there, this function is only meant to handle when we want to read

370

# part of the disk.

371

assert self._dirblock_state == DirState.NOT_IN_MEMORY

372

373

# The disk representation is generally info + '\0\n\0' at the end. But

374

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

375

# Because it means we can sync on the '\n'

376

state_file = self._state_file

377

file_size = os.fstat(state_file.fileno()).st_size

378

# We end up with 2 extra fields, we should have a trailing '\n' to

379

# ensure that we read the whole record, and we should have a precursur

380

# '' which ensures that we start after the previous '\n'

381

entry_field_count = self._fields_per_entry() + 1

382

383

low = self._end_of_header

384

high = file_size - 1 # Ignore the final '\0'

385

# Map from (dir, name) => entry

386

found = {}

387

388

# Avoid infinite seeking

389

max_count = 30*len(dir_name_list)

390

count = 0

391

# pending is a list of places to look.

392

# each entry is a tuple of low, high, dir_names

393

# low -> the first byte offset to read (inclusive)

394

# high -> the last byte offset (inclusive)

395

# dir_names -> The list of (dir, name) pairs that should be found in

396

# the [low, high] range

397

pending = [(low, high, dir_name_list)]

398

399

page_size = self._bisect_page_size

400

401

fields_to_entry = self._get_fields_to_entry()

402

403

while pending:

404

low, high, cur_files = pending.pop()

405

406

if not cur_files:

407

# No files to look for

408

continue

409

410

if low >= high:

411

# Did not find cur_files, these will be returned as None

412

# However, other logic should probably prevent this from ever

413

# happening.

414

continue

415

416

count += 1

417

if count > max_count:

418

raise errors.BzrError('Too many seeks, most likely a bug.')

419

420

mid = max(low, (low+high-page_size)/2)

421

422

state_file.seek(mid)

423

# limit the read size, so we don't end up reading data that we have

424

# already read.

425

read_size = min(page_size, (high-mid)+1)

426

block = state_file.read(read_size)

427

428

start = mid

429

entries = block.split('\n')

430

431

if len(entries) < 2:

432

# We didn't find a '\n', so we cannot have found any records.

433

# So put this range back and try again. But we know we have to

434

# increase the page size, because a single read did not contain

435

# a record break (so records must be larger than page_size)

436

page_size *= 2

437

pending.append((low, high, cur_files))

438

continue

439

440

# Check the first and last entries, in case they are partial, or if

441

# we don't care about the rest of this page

442

first_entry_num = 0

443

first_fields = entries[0].split('\0')

444

if len(first_fields) < entry_field_count:

445

# We didn't get the complete first entry

446

# so move start, and grab the next, which

447

# should be a full entry

448

start += len(entries[0])+1

449

first_fields = entries[1].split('\0')

450

first_entry_num = 1

451

452

if len(first_fields) <= 2:

453

# We didn't even get a filename here... what do we do?

454

# Try a large page size and repeat this query

455

page_size *= 2

456

pending.append((low, high, cur_files))

457

continue

458

else:

459

# Find what entries we are looking for, which occur before and

460

# after this first record.

461

after = start

462

first_dir_name = (first_fields[1], first_fields[2])

463

first_loc = bisect.bisect_left(cur_files, first_dir_name)

464

465

# These exist before the current location

466

pre = cur_files[:first_loc]

467

# These occur after the current location, which may be in the

468

# data we read, or might be after the last entry

469

post = cur_files[first_loc:]

470

471

if post and len(first_fields) >= entry_field_count:

472

# We have files after the first entry

473

474

# Parse the last entry

475

last_entry_num = len(entries)-1

476

last_fields = entries[last_entry_num].split('\0')

477

if len(last_fields) < entry_field_count:

478

# The very last hunk was not complete,

479

# read the previous hunk

480

after = mid + len(block) - len(entries[-1])

481

last_entry_num -= 1

482

last_fields = entries[last_entry_num].split('\0')

483

else:

484

after = mid + len(block)

485

486

last_dir_name = (last_fields[1], last_fields[2])

487

last_loc = bisect.bisect_right(post, last_dir_name)

488

489

middle_files = post[:last_loc]

490

post = post[last_loc:]

491

492

if middle_files:

493

# We have files that should occur in this block

494

# (>= first, <= last)

495

# Either we will find them here, or we can mark them as

496

# missing.

497

498

if middle_files[0] == first_dir_name:

499

# We might need to go before this location

500

pre.append(first_dir_name)

501

if middle_files[-1] == last_dir_name:

502

post.insert(0, last_dir_name)

503

504

# Find out what paths we have

505

paths = {first_dir_name:[first_fields]}

506

# last_dir_name might == first_dir_name so we need to be

507

# careful if we should append rather than overwrite

508

if last_entry_num != first_entry_num:

509

paths.setdefault(last_dir_name, []).append(last_fields)

510

for num in xrange(first_entry_num+1, last_entry_num):

511

# TODO: jam 20070223 We are already splitting here, so

512

# shouldn't we just split the whole thing rather

513

# than doing the split again in add_one_record?

514

fields = entries[num].split('\0')

515

dir_name = (fields[1], fields[2])

516

paths.setdefault(dir_name, []).append(fields)

517

518

for dir_name in middle_files:

519

for fields in paths.get(dir_name, []):

520

# offset by 1 because of the opening '\0'

521

# consider changing fields_to_entry to avoid the

522

# extra list slice

523

entry = fields_to_entry(fields[1:])

524

found.setdefault(dir_name, []).append(entry)

525

526

# Now we have split up everything into pre, middle, and post, and

527

# we have handled everything that fell in 'middle'.

528

# We add 'post' first, so that we prefer to seek towards the

529

# beginning, so that we will tend to go as early as we need, and

530

# then only seek forward after that.

531

if post:

532

pending.append((after, high, post))

533

if pre:

534

pending.append((low, start-1, pre))

535

536

return [found.get(dir_name, None) for dir_name in dir_name_list]

537

356

538

def _empty_parent_info(self):

357

539

return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -

358

540

len(self._ghosts))

459

641

entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]]

460

642

return '\0'.join(entire_entry)

461

643

462

def _fields_per_row(self):

644

def _fields_per_entry(self):

463

645

"""How many null separated fields should be in each entry row.

464

646

465

647

Each line now has an extra '\n' field which is not used

894

1076

# + newline

895

1077

num_present_parents = self._num_present_parents()

896

1078

tree_count = 1 + num_present_parents

897

entry_size = self._fields_per_row()

1079

entry_size = self._fields_per_entry()

898

1080

expected_field_count = entry_size * self._num_entries

899

1081

if len(fields) - cur > expected_field_count:

900

1082

fields = fields[:expected_field_count + cur]

986

1168

assert num_ghosts == len(info)-3, 'incorrect ghost info line'

987

1169

self._ghosts = info[2:-1]

988

1170

self._header_state = DirState.IN_MEMORY_UNMODIFIED

1171

self._end_of_header = self._state_file.tell()

989

1172

990

1173

def _read_header_if_needed(self):

991

1174

"""Read the header of the dirstate file if needed."""

Older »