353
360
self._ensure_block(block_index, entry_index, utf8path)
354
361
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
363
def _bisect(self, dir_name_list):
364
"""Bisect through the disk structure for specific rows."""
365
self._requires_lock()
366
# We need the file pointer to be right after the initial header block
367
self._read_header_if_needed()
368
# If _dirblock_state was in memory, we should just return info from
369
# there, this function is only meant to handle when we want to read
371
assert self._dirblock_state == DirState.NOT_IN_MEMORY
373
# The disk representation is generally info + '\0\n\0' at the end. But
374
# for bisecting, it is easier to treat this as '\0' + info + '\0\n'
375
# Because it means we can sync on the '\n'
376
state_file = self._state_file
377
file_size = os.fstat(state_file.fileno()).st_size
378
# We end up with 2 extra fields, we should have a trailing '\n' to
379
# ensure that we read the whole record, and we should have a precursur
380
# '' which ensures that we start after the previous '\n'
381
entry_field_count = self._fields_per_entry() + 1
383
low = self._end_of_header
384
high = file_size - 1 # Ignore the final '\0'
385
# Map from (dir, name) => entry
388
# Avoid infinite seeking
389
max_count = 30*len(dir_name_list)
391
# pending is a list of places to look.
392
# each entry is a tuple of low, high, dir_names
393
# low -> the first byte offset to read (inclusive)
394
# high -> the last byte offset (inclusive)
395
# dir_names -> The list of (dir, name) pairs that should be found in
396
# the [low, high] range
397
pending = [(low, high, dir_name_list)]
399
page_size = self._bisect_page_size
401
fields_to_entry = self._get_fields_to_entry()
404
low, high, cur_files = pending.pop()
407
# No files to look for
411
# Did not find cur_files, these will be returned as None
412
# However, other logic should probably prevent this from ever
417
if count > max_count:
418
raise errors.BzrError('Too many seeks, most likely a bug.')
420
mid = max(low, (low+high-page_size)/2)
423
# limit the read size, so we don't end up reading data that we have
425
read_size = min(page_size, (high-mid)+1)
426
block = state_file.read(read_size)
429
entries = block.split('\n')
432
# We didn't find a '\n', so we cannot have found any records.
433
# So put this range back and try again. But we know we have to
434
# increase the page size, because a single read did not contain
435
# a record break (so records must be larger than page_size)
437
pending.append((low, high, cur_files))
440
# Check the first and last entries, in case they are partial, or if
441
# we don't care about the rest of this page
443
first_fields = entries[0].split('\0')
444
if len(first_fields) < entry_field_count:
445
# We didn't get the complete first entry
446
# so move start, and grab the next, which
447
# should be a full entry
448
start += len(entries[0])+1
449
first_fields = entries[1].split('\0')
452
if len(first_fields) <= 2:
453
# We didn't even get a filename here... what do we do?
454
# Try a large page size and repeat this query
456
pending.append((low, high, cur_files))
459
# Find what entries we are looking for, which occur before and
460
# after this first record.
462
first_dir_name = (first_fields[1], first_fields[2])
463
first_loc = bisect.bisect_left(cur_files, first_dir_name)
465
# These exist before the current location
466
pre = cur_files[:first_loc]
467
# These occur after the current location, which may be in the
468
# data we read, or might be after the last entry
469
post = cur_files[first_loc:]
471
if post and len(first_fields) >= entry_field_count:
472
# We have files after the first entry
474
# Parse the last entry
475
last_entry_num = len(entries)-1
476
last_fields = entries[last_entry_num].split('\0')
477
if len(last_fields) < entry_field_count:
478
# The very last hunk was not complete,
479
# read the previous hunk
480
after = mid + len(block) - len(entries[-1])
482
last_fields = entries[last_entry_num].split('\0')
484
after = mid + len(block)
486
last_dir_name = (last_fields[1], last_fields[2])
487
last_loc = bisect.bisect_right(post, last_dir_name)
489
middle_files = post[:last_loc]
490
post = post[last_loc:]
493
# We have files that should occur in this block
494
# (>= first, <= last)
495
# Either we will find them here, or we can mark them as
498
if middle_files[0] == first_dir_name:
499
# We might need to go before this location
500
pre.append(first_dir_name)
501
if middle_files[-1] == last_dir_name:
502
post.insert(0, last_dir_name)
504
# Find out what paths we have
505
paths = {first_dir_name:[first_fields]}
506
# last_dir_name might == first_dir_name so we need to be
507
# careful if we should append rather than overwrite
508
if last_entry_num != first_entry_num:
509
paths.setdefault(last_dir_name, []).append(last_fields)
510
for num in xrange(first_entry_num+1, last_entry_num):
511
# TODO: jam 20070223 We are already splitting here, so
512
# shouldn't we just split the whole thing rather
513
# than doing the split again in add_one_record?
514
fields = entries[num].split('\0')
515
dir_name = (fields[1], fields[2])
516
paths.setdefault(dir_name, []).append(fields)
518
for dir_name in middle_files:
519
for fields in paths.get(dir_name, []):
520
# offset by 1 because of the opening '\0'
521
# consider changing fields_to_entry to avoid the
523
entry = fields_to_entry(fields[1:])
524
found.setdefault(dir_name, []).append(entry)
526
# Now we have split up everything into pre, middle, and post, and
527
# we have handled everything that fell in 'middle'.
528
# We add 'post' first, so that we prefer to seek towards the
529
# beginning, so that we will tend to go as early as we need, and
530
# then only seek forward after that.
532
pending.append((after, high, post))
534
pending.append((low, start-1, pre))
536
return [found.get(dir_name, None) for dir_name in dir_name_list]
356
538
def _empty_parent_info(self):
357
539
return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
358
540
len(self._ghosts))