1
# Copyright (C) 2007, 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Helper functions for DirState.
19
This is the python implementation for DirState functions.
25
from bzrlib import cache_utf8, errors, osutils
26
from bzrlib.dirstate import DirState, pack_stat
27
from bzrlib.osutils import pathjoin
30
# Give Pyrex some function definitions for it to understand.
31
# All of these are just hints to Pyrex, so that it can try to convert python
32
# objects into similar C objects. (such as PyInt => int).
33
# In anything defined 'cdef extern from XXX' the real C header will be
34
# imported, and the real definition will be used from there. So these are just
35
# hints, and do not need to match exactly to the C definitions.
38
ctypedef unsigned long size_t
40
cdef extern from "_dirstate_helpers_c.h":
44
cdef extern from "arpa/inet.h":
45
unsigned long htonl(unsigned long)
48
cdef extern from "stdlib.h":
49
unsigned long int strtoul(char *nptr, char **endptr, int base)
51
cdef extern from "stdio.h":
52
void printf(char *format, ...)
54
cdef extern from 'sys/stat.h':
60
# These functions allow us access to a bit of the 'bare metal' of python
61
# objects, rather than going through the object abstraction. (For example,
62
# PyList_Append, rather than getting the 'append' attribute of the object, and
63
# creating a tuple, and then using PyCallObject).
64
# Functions that return (or take) a void* are meant to grab a C PyObject*. This
65
# differs from the Pyrex 'object'. If you declare a variable as 'object' Pyrex
66
# will automatically Py_INCREF and Py_DECREF when appropriate. But for some
67
# inner loops, we don't need to do that at all, as the reference only lasts for
69
cdef extern from "Python.h":
70
ctypedef int Py_ssize_t
71
ctypedef struct PyObject:
73
int PyList_Append(object lst, object item) except -1
74
void *PyList_GetItem_object_void "PyList_GET_ITEM" (object lst, int index)
75
void *PyList_GetItem_void_void "PyList_GET_ITEM" (void * lst, int index)
76
int PyList_CheckExact(object)
78
void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
79
object PyTuple_GetItem_void_object "PyTuple_GET_ITEM" (void* tpl, int index)
81
char *PyString_AsString(object p)
82
char *PyString_AsString_obj "PyString_AsString" (PyObject *string)
83
char *PyString_AS_STRING_void "PyString_AS_STRING" (void *p)
84
object PyString_FromString(char *)
85
object PyString_FromStringAndSize(char *, Py_ssize_t)
86
int PyString_Size(object p)
87
int PyString_GET_SIZE_void "PyString_GET_SIZE" (void *p)
88
int PyString_CheckExact(object p)
89
void Py_INCREF(object o)
90
void Py_DECREF(object o)
93
cdef extern from "string.h":
94
int strncmp(char *s1, char *s2, int len)
95
void *memchr(void *s, int c, size_t len)
96
int memcmp(void *b1, void *b2, size_t len)
97
# ??? memrchr is a GNU extension :(
98
# void *memrchr(void *s, int c, size_t len)
101
cdef void* _my_memrchr(void *s, int c, size_t n):
102
# memrchr seems to be a GNU extension, so we have to implement it ourselves
115
def _py_memrchr(s, c):
116
"""Just to expose _my_memrchr for testing.
118
:param s: The Python string to search
119
:param c: The character to search for
120
:return: The offset to the last instance of 'c' in s
127
_s = PyString_AsString(s)
128
length = PyString_Size(s)
130
_c = PyString_AsString(c)
131
assert PyString_Size(c) == 1,\
132
'Must be a single character string, not %s' % (c,)
133
found = _my_memrchr(_s, _c[0], length)
136
return <char*>found - <char*>_s
138
cdef object safe_string_from_size(char *s, Py_ssize_t size):
140
raise AssertionError(
141
'tried to create a string with an invalid size: %d @0x%x'
143
return PyString_FromStringAndSize(s, size)
146
cdef int _is_aligned(void *ptr):
147
"""Is this pointer aligned to an integer size offset?
149
:return: 1 if this pointer is aligned, 0 otherwise.
151
return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
154
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
155
cdef unsigned char *cur1
156
cdef unsigned char *cur2
157
cdef unsigned char *end1
158
cdef unsigned char *end2
164
if path1 == path2 and size1 == size2:
167
end1 = <unsigned char*>path1+size1
168
end2 = <unsigned char*>path2+size2
170
# Use 32-bit comparisons for the matching portion of the string.
171
# Almost all CPU's are faster at loading and comparing 32-bit integers,
172
# than they are at 8-bit integers.
173
# 99% of the time, these will be aligned, but in case they aren't just skip
175
if _is_aligned(path1) and _is_aligned(path2):
176
cur_int1 = <int*>path1
177
cur_int2 = <int*>path2
178
end_int1 = <int*>(path1 + size1 - (size1 % sizeof(int)))
179
end_int2 = <int*>(path2 + size2 - (size2 % sizeof(int)))
181
while cur_int1 < end_int1 and cur_int2 < end_int2:
182
if cur_int1[0] != cur_int2[0]:
184
cur_int1 = cur_int1 + 1
185
cur_int2 = cur_int2 + 1
187
cur1 = <unsigned char*>cur_int1
188
cur2 = <unsigned char*>cur_int2
190
cur1 = <unsigned char*>path1
191
cur2 = <unsigned char*>path2
193
while cur1 < end1 and cur2 < end2:
194
if cur1[0] == cur2[0]:
195
# This character matches, just go to the next one
199
# The current characters do not match
201
return -1 # Reached the end of path1 segment first
202
elif cur2[0] == c'/':
203
return 1 # Reached the end of path2 segment first
204
elif cur1[0] < cur2[0]:
209
# We reached the end of at least one of the strings
211
return 1 # Not at the end of cur1, must be at the end of cur2
213
return -1 # At the end of cur1, but not at cur2
214
# We reached the end of both strings
218
def cmp_by_dirs_c(path1, path2):
219
"""Compare two paths directory by directory.
221
This is equivalent to doing::
223
cmp(path1.split('/'), path2.split('/'))
225
The idea is that you should compare path components separately. This
226
differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
227
``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
229
:param path1: first path
230
:param path2: second path
231
:return: negative number if ``path1`` comes first,
232
0 if paths are equal,
233
and positive number if ``path2`` sorts first
235
if not PyString_CheckExact(path1):
236
raise TypeError("'path1' must be a plain string, not %s: %r"
237
% (type(path1), path1))
238
if not PyString_CheckExact(path2):
239
raise TypeError("'path2' must be a plain string, not %s: %r"
240
% (type(path2), path2))
241
return _cmp_by_dirs(PyString_AsString(path1),
242
PyString_Size(path1),
243
PyString_AsString(path2),
244
PyString_Size(path2))
247
def _cmp_path_by_dirblock_c(path1, path2):
248
"""Compare two paths based on what directory they are in.
250
This generates a sort order, such that all children of a directory are
251
sorted together, and grandchildren are in the same order as the
252
children appear. But all grandchildren come after all children.
254
In other words, all entries in a directory are sorted together, and
255
directorys are sorted in cmp_by_dirs order.
257
:param path1: first path
258
:param path2: the second path
259
:return: negative number if ``path1`` comes first,
261
and a positive number if ``path2`` sorts first
263
if not PyString_CheckExact(path1):
264
raise TypeError("'path1' must be a plain string, not %s: %r"
265
% (type(path1), path1))
266
if not PyString_CheckExact(path2):
267
raise TypeError("'path2' must be a plain string, not %s: %r"
268
% (type(path2), path2))
269
return _cmp_path_by_dirblock(PyString_AsString(path1),
270
PyString_Size(path1),
271
PyString_AsString(path2),
272
PyString_Size(path2))
275
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
276
char *path2, int path2_len):
277
"""Compare two paths by what directory they are in.
279
see ``_cmp_path_by_dirblock_c`` for details.
282
cdef int dirname1_len
284
cdef int dirname2_len
286
cdef int basename1_len
288
cdef int basename2_len
292
if path1_len == 0 and path2_len == 0:
295
if path1 == path2 and path1_len == path2_len:
304
basename1 = <char*>_my_memrchr(path1, c'/', path1_len)
306
if basename1 == NULL:
308
basename1_len = path1_len
313
dirname1_len = basename1 - path1
314
basename1 = basename1 + 1
315
basename1_len = path1_len - dirname1_len - 1
317
basename2 = <char*>_my_memrchr(path2, c'/', path2_len)
319
if basename2 == NULL:
321
basename2_len = path2_len
326
dirname2_len = basename2 - path2
327
basename2 = basename2 + 1
328
basename2_len = path2_len - dirname2_len - 1
330
cmp_val = _cmp_by_dirs(dirname1, dirname1_len,
331
dirname2, dirname2_len)
335
cur_len = basename1_len
336
if basename2_len < basename1_len:
337
cur_len = basename2_len
339
cmp_val = memcmp(basename1, basename2, cur_len)
342
if basename1_len == basename2_len:
344
if basename1_len < basename2_len:
349
def _bisect_path_left_c(paths, path):
350
"""Return the index where to insert path into paths.
352
This uses a path-wise comparison so we get::
362
:param paths: A list of paths to search through
363
:param path: A single path to insert
364
:return: An offset where 'path' can be inserted.
365
:seealso: bisect.bisect_left
376
if not PyList_CheckExact(paths):
377
raise TypeError("you must pass a python list for 'paths' not: %s %r"
378
% (type(paths), paths))
379
if not PyString_CheckExact(path):
380
raise TypeError("you must pass a string for 'path' not: %s %r"
381
% (type(path), path))
386
path_cstr = PyString_AsString(path)
387
path_size = PyString_Size(path)
390
_mid = (_lo + _hi) / 2
391
cur = PyList_GetItem_object_void(paths, _mid)
392
cur_cstr = PyString_AS_STRING_void(cur)
393
cur_size = PyString_GET_SIZE_void(cur)
394
if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
401
def _bisect_path_right_c(paths, path):
402
"""Return the index where to insert path into paths.
404
This uses a path-wise comparison so we get::
414
:param paths: A list of paths to search through
415
:param path: A single path to insert
416
:return: An offset where 'path' can be inserted.
417
:seealso: bisect.bisect_right
428
if not PyList_CheckExact(paths):
429
raise TypeError("you must pass a python list for 'paths' not: %s %r"
430
% (type(paths), paths))
431
if not PyString_CheckExact(path):
432
raise TypeError("you must pass a string for 'path' not: %s %r"
433
% (type(path), path))
438
path_cstr = PyString_AsString(path)
439
path_size = PyString_Size(path)
442
_mid = (_lo + _hi) / 2
443
cur = PyList_GetItem_object_void(paths, _mid)
444
cur_cstr = PyString_AS_STRING_void(cur)
445
cur_size = PyString_GET_SIZE_void(cur)
446
if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
453
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
454
"""Return the index where to insert dirname into the dirblocks.
456
The return value idx is such that all directories blocks in dirblock[:idx]
457
have names < dirname, and all blocks in dirblock[idx:] have names >=
460
Optional args lo (default 0) and hi (default len(dirblocks)) bound the
461
slice of a to be searched.
466
cdef char *dirname_cstr
467
cdef int dirname_size
472
if not PyList_CheckExact(dirblocks):
473
raise TypeError("you must pass a python list for 'dirblocks' not: %s %r"
474
% (type(dirblocks), dirblocks))
475
if not PyString_CheckExact(dirname):
476
raise TypeError("you must pass a string for dirname not: %s %r"
477
% (type(dirname), dirname))
484
dirname_cstr = PyString_AsString(dirname)
485
dirname_size = PyString_Size(dirname)
488
_mid = (_lo + _hi) / 2
489
# Grab the dirname for the current dirblock
490
# cur = dirblocks[_mid][0]
491
cur = PyTuple_GetItem_void_void(
492
PyList_GetItem_object_void(dirblocks, _mid), 0)
493
cur_cstr = PyString_AS_STRING_void(cur)
494
cur_size = PyString_GET_SIZE_void(cur)
495
if _cmp_by_dirs(cur_cstr, cur_size, dirname_cstr, dirname_size) < 0:
503
"""Maintain the current location, and return fields as you parse them."""
505
cdef object state # The DirState object
506
cdef object text # The overall string object
507
cdef char *text_cstr # Pointer to the beginning of text
508
cdef int text_size # Length of text
510
cdef char *end_cstr # End of text
511
cdef char *cur_cstr # Pointer to the current record
512
cdef char *next # Pointer to the end of this record
514
def __init__(self, text, state):
517
self.text_cstr = PyString_AsString(text)
518
self.text_size = PyString_Size(text)
519
self.end_cstr = self.text_cstr + self.text_size
520
self.cur_cstr = self.text_cstr
522
cdef char *get_next(self, int *size) except NULL:
523
"""Return a pointer to the start of the next field."""
525
cdef Py_ssize_t extra_len
527
if self.cur_cstr == NULL:
528
raise AssertionError('get_next() called when cur_str is NULL')
529
elif self.cur_cstr >= self.end_cstr:
530
raise AssertionError('get_next() called when there are no chars'
533
self.cur_cstr = <char*>memchr(next, c'\0', self.end_cstr - next)
534
if self.cur_cstr == NULL:
535
extra_len = self.end_cstr - next
536
raise errors.DirstateCorrupt(self.state,
537
'failed to find trailing NULL (\\0).'
538
' Trailing garbage: %r'
539
% safe_string_from_size(next, extra_len))
540
size[0] = self.cur_cstr - next
541
self.cur_cstr = self.cur_cstr + 1
544
cdef object get_next_str(self):
545
"""Get the next field as a Python string."""
548
next = self.get_next(&size)
549
return safe_string_from_size(next, size)
551
cdef int _init(self) except -1:
552
"""Get the pointer ready.
554
This assumes that the dirstate header has already been read, and we
555
already have the dirblock string loaded into memory.
556
This just initializes our memory pointers, etc for parsing of the
561
# The first field should be an empty string left over from the Header
562
first = self.get_next(&size)
563
if first[0] != c'\0' and size == 0:
564
raise AssertionError('First character should be null not: %s'
568
cdef object _get_entry(self, int num_trees, void **p_current_dirname,
570
"""Extract the next entry.
572
This parses the next entry based on the current location in
574
Each entry can be considered a "row" in the total table. And each row
575
has a fixed number of columns. It is generally broken up into "key"
576
columns, then "current" columns, and then "parent" columns.
578
:param num_trees: How many parent trees need to be parsed
579
:param p_current_dirname: A pointer to the current PyString
580
representing the directory name.
581
We pass this in as a void * so that pyrex doesn't have to
582
increment/decrement the PyObject reference counter for each
584
We use a pointer so that _get_entry can update it with the new
586
:param new_block: This is to let the caller know that it needs to
587
create a new directory block to store the next entry.
589
cdef object path_name_file_id_key
590
cdef char *entry_size_cstr
591
cdef unsigned long int entry_size
592
cdef char* executable_cstr
593
cdef int is_executable
594
cdef char* dirname_cstr
599
cdef object fingerprint
602
# Read the 'key' information (dirname, name, file_id)
603
dirname_cstr = self.get_next(&cur_size)
604
# Check to see if we have started a new directory block.
605
# If so, then we need to create a new dirname PyString, so that it can
606
# be used in all of the tuples. This saves time and memory, by re-using
607
# the same object repeatedly.
609
# Do the cheap 'length of string' check first. If the string is a
610
# different length, then we *have* to be a different directory.
611
if (cur_size != PyString_GET_SIZE_void(p_current_dirname[0])
612
or strncmp(dirname_cstr,
613
# Extract the char* from our current dirname string. We
614
# know it is a PyString, so we can use
615
# PyString_AS_STRING, we use the _void version because
616
# we are tricking Pyrex by using a void* rather than an
618
PyString_AS_STRING_void(p_current_dirname[0]),
620
dirname = safe_string_from_size(dirname_cstr, cur_size)
621
p_current_dirname[0] = <void*>dirname
626
# Build up the key that will be used.
627
# By using <object>(void *) Pyrex will automatically handle the
628
# Py_INCREF that we need.
629
path_name_file_id_key = (<object>p_current_dirname[0],
634
# Parse all of the per-tree information. current has the information in
635
# the same location as parent trees. The only difference is that 'info'
636
# is a 'packed_stat' for current, while it is a 'revision_id' for
638
# minikind, fingerprint, and info will be returned as regular python
640
# entry_size and is_executable will be parsed into a python Long and
641
# python Boolean, respectively.
642
# TODO: jam 20070718 Consider changin the entry_size conversion to
643
# prefer python Int when possible. They are generally faster to
644
# work with, and it will be rare that we have a file >2GB.
645
# Especially since this code is pretty much fixed at a max of
648
for i from 0 <= i < num_trees:
649
minikind = self.get_next_str()
650
fingerprint = self.get_next_str()
651
entry_size_cstr = self.get_next(&cur_size)
652
entry_size = strtoul(entry_size_cstr, NULL, 10)
653
executable_cstr = self.get_next(&cur_size)
654
is_executable = (executable_cstr[0] == c'y')
655
info = self.get_next_str()
656
PyList_Append(trees, (
658
fingerprint, # fingerprint
660
is_executable,# executable
661
info, # packed_stat or revision_id
664
# The returned tuple is (key, [trees])
665
ret = (path_name_file_id_key, trees)
666
# Ignore the trailing newline, but assert that it does exist, this
667
# ensures that we always finish parsing a line on an end-of-entry
669
trailing = self.get_next(&cur_size)
670
if cur_size != 1 or trailing[0] != c'\n':
671
raise errors.DirstateCorrupt(self.state,
672
'Bad parse, we expected to end on \\n, not: %d %s: %s'
673
% (cur_size, safe_string_from_size(trailing, cur_size),
677
def _parse_dirblocks(self):
678
"""Parse all dirblocks in the state file."""
680
cdef object current_block
682
cdef void * current_dirname
684
cdef int expected_entry_count
687
num_trees = self.state._num_present_parents() + 1
688
expected_entry_count = self.state._num_entries
690
# Ignore the first record
694
dirblocks = [('', current_block), ('', [])]
695
self.state._dirblocks = dirblocks
697
current_dirname = <void*>obj
701
# TODO: jam 2007-05-07 Consider pre-allocating some space for the
702
# members, and then growing and shrinking from there. If most
703
# directories have close to 10 entries in them, it would save a
704
# few mallocs if we default our list size to something
705
# reasonable. Or we could malloc it to something large (100 or
706
# so), and then truncate. That would give us a malloc + realloc,
707
# rather than lots of reallocs.
708
while self.cur_cstr < self.end_cstr:
709
entry = self._get_entry(num_trees, ¤t_dirname, &new_block)
711
# new block - different dirname
713
PyList_Append(dirblocks,
714
(<object>current_dirname, current_block))
715
PyList_Append(current_block, entry)
716
entry_count = entry_count + 1
717
if entry_count != expected_entry_count:
718
raise errors.DirstateCorrupt(self.state,
719
'We read the wrong number of entries.'
720
' We expected to read %s, but read %s'
721
% (expected_entry_count, entry_count))
722
self.state._split_root_dirblock_into_contents()
725
def _read_dirblocks_c(state):
726
"""Read in the dirblocks for the given DirState object.
728
This is tightly bound to the DirState internal representation. It should be
729
thought of as a member function, which is only separated out so that we can
730
re-write it in pyrex.
732
:param state: A DirState object.
734
:postcondition: The dirblocks will be loaded into the appropriate fields in
737
state._state_file.seek(state._end_of_header)
738
text = state._state_file.read()
739
# TODO: check the crc checksums. crc_measured = zlib.crc32(text)
741
reader = Reader(text, state)
743
reader._parse_dirblocks()
744
state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
747
cdef int minikind_from_mode(int mode):
748
# in order of frequency:
759
_encode = binascii.b2a_base64
761
from struct import pack
762
cdef _pack_stat(stat_value):
763
"""return a string representing the stat value's key fields.
765
:param stat_value: A stat oject with st_size, st_mtime, st_ctime, st_dev,
766
st_ino and st_mode fields.
768
cdef char result[6*4] # 6 long ints
770
aliased = <int *>result
771
aliased[0] = htonl(stat_value.st_size)
772
aliased[1] = htonl(int(stat_value.st_mtime))
773
aliased[2] = htonl(int(stat_value.st_ctime))
774
aliased[3] = htonl(stat_value.st_dev)
775
aliased[4] = htonl(stat_value.st_ino & 0xFFFFFFFF)
776
aliased[5] = htonl(stat_value.st_mode)
777
packed = PyString_FromStringAndSize(result, 6*4)
778
return _encode(packed)[:-1]
781
def update_entry(self, entry, abspath, stat_value):
782
"""Update the entry based on what is actually on disk.
784
:param entry: This is the dirblock entry for the file in question.
785
:param abspath: The path on disk for this file.
786
:param stat_value: (optional) if we already have done a stat on the
788
:return: The sha1 hexdigest of the file (40 bytes) or link target of a
791
return _update_entry(self, entry, abspath, stat_value)
793
cdef _update_entry(self, entry, abspath, stat_value):
794
"""Update the entry based on what is actually on disk.
796
:param entry: This is the dirblock entry for the file in question.
797
:param abspath: The path on disk for this file.
798
:param stat_value: (optional) if we already have done a stat on the
800
:return: The sha1 hexdigest of the file (40 bytes) or link target of a
803
# TODO - require pyrex 0.8, then use a pyd file to define access to the _st
804
# mode of the compiled stat objects.
805
cdef int minikind, saved_minikind
807
# pyrex 0.9.7 would allow cdef list details_list, and direct access rather
808
# than PyList_GetItem_void_void below
809
minikind = minikind_from_mode(stat_value.st_mode)
812
packed_stat = _pack_stat(stat_value)
813
details = PyList_GetItem_void_void(PyTuple_GetItem_void_void(<void *>entry, 1), 0)
814
saved_minikind = PyString_AsString_obj(<PyObject *>PyTuple_GetItem_void_void(details, 0))[0]
815
saved_link_or_sha1 = PyTuple_GetItem_void_object(details, 1)
816
saved_file_size = PyTuple_GetItem_void_object(details, 2)
817
saved_executable = PyTuple_GetItem_void_object(details, 3)
818
saved_packed_stat = PyTuple_GetItem_void_object(details, 4)
819
# Deal with pyrex decrefing the objects
820
Py_INCREF(saved_link_or_sha1)
821
Py_INCREF(saved_file_size)
822
Py_INCREF(saved_executable)
823
Py_INCREF(saved_packed_stat)
824
#(saved_minikind, saved_link_or_sha1, saved_file_size,
825
# saved_executable, saved_packed_stat) = entry[1][0]
827
if (minikind == saved_minikind
828
and packed_stat == saved_packed_stat):
829
# The stat hasn't changed since we saved, so we can re-use the
834
# size should also be in packed_stat
835
if saved_file_size == stat_value.st_size:
836
return saved_link_or_sha1
838
# If we have gotten this far, that means that we need to actually
839
# process this entry.
842
link_or_sha1 = self._sha1_file(abspath)
843
executable = self._is_executable(stat_value.st_mode,
845
if self._cutoff_time is None:
846
self._sha_cutoff_time()
847
if (stat_value.st_mtime < self._cutoff_time
848
and stat_value.st_ctime < self._cutoff_time):
849
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
850
executable, packed_stat)
852
entry[1][0] = ('f', '', stat_value.st_size,
853
executable, DirState.NULLSTAT)
854
elif minikind == c'd':
856
entry[1][0] = ('d', '', 0, False, packed_stat)
857
if saved_minikind != c'd':
858
# This changed from something into a directory. Make sure we
859
# have a directory block for it. This doesn't happen very
860
# often, so this doesn't have to be super fast.
861
block_index, entry_index, dir_present, file_present = \
862
self._get_block_entry_index(entry[0][0], entry[0][1], 0)
863
self._ensure_block(block_index, entry_index,
864
osutils.pathjoin(entry[0][0], entry[0][1]))
865
elif minikind == c'l':
866
link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
867
if self._cutoff_time is None:
868
self._sha_cutoff_time()
869
if (stat_value.st_mtime < self._cutoff_time
870
and stat_value.st_ctime < self._cutoff_time):
871
entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
874
entry[1][0] = ('l', '', stat_value.st_size,
875
False, DirState.NULLSTAT)
876
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
880
cdef char _minikind_from_string(object string):
881
"""Convert a python string to a char."""
882
return PyString_AsString(string)[0]
885
cdef object _kind_absent
886
cdef object _kind_file
887
cdef object _kind_directory
888
cdef object _kind_symlink
889
cdef object _kind_relocated
890
cdef object _kind_tree_reference
891
_kind_absent = "absent"
893
_kind_directory = "directory"
894
_kind_symlink = "symlink"
895
_kind_relocated = "relocated"
896
_kind_tree_reference = "tree-reference"
899
cdef object _minikind_to_kind(char minikind):
900
"""Create a string kind for minikind."""
901
cdef char _minikind[1]
904
elif minikind == c'd':
905
return _kind_directory
906
elif minikind == c'a':
908
elif minikind == c'r':
909
return _kind_relocated
910
elif minikind == c'l':
912
elif minikind == c't':
913
return _kind_tree_reference
914
_minikind[0] = minikind
915
raise KeyError(PyString_FromStringAndSize(_minikind, 1))
918
cdef int _versioned_minikind(char minikind):
919
"""Return non-zero if minikind is in fltd"""
920
return (minikind == c'f' or
926
cdef class ProcessEntryC:
928
cdef object old_dirname_to_file_id # dict
929
cdef object new_dirname_to_file_id # dict
930
cdef readonly object uninteresting
931
cdef object last_source_parent
932
cdef object last_target_parent
933
cdef object include_unchanged
934
cdef object use_filesystem_for_exec
935
cdef object utf8_decode
936
cdef readonly object searched_specific_files
937
cdef object search_specific_files
939
def __init__(self, include_unchanged, use_filesystem_for_exec, search_specific_files):
940
self.old_dirname_to_file_id = {}
941
self.new_dirname_to_file_id = {}
942
# Just a sentry, so that _process_entry can say that this
943
# record is handled, but isn't interesting to process (unchanged)
944
self.uninteresting = object()
945
# Using a list so that we can access the values and change them in
946
# nested scope. Each one is [path, file_id, entry]
947
self.last_source_parent = [None, None]
948
self.last_target_parent = [None, None]
949
self.include_unchanged = include_unchanged
950
self.use_filesystem_for_exec = use_filesystem_for_exec
951
self.utf8_decode = cache_utf8._utf8_decode
952
# for all search_indexs in each path at or under each element of
953
# search_specific_files, if the detail is relocated: add the id, and add the
954
# relocated path as one to search if its not searched already. If the
955
# detail is not relocated, add the id.
956
self.searched_specific_files = set()
957
self.search_specific_files = search_specific_files
959
def _process_entry(self, entry, path_info, source_index, int target_index, state):
960
"""Compare an entry and real disk to generate delta information.
962
:param path_info: top_relpath, basename, kind, lstat, abspath for
963
the path of entry. If None, then the path is considered absent.
964
(Perhaps we should pass in a concrete entry for this ?)
965
Basename is returned as a utf8 string because we expect this
966
tuple will be ignored, and don't want to take the time to
968
:return: None if these don't match
969
A tuple of information about the change, or
970
the object 'uninteresting' if these match, but are
973
cdef char target_minikind
974
cdef char source_minikind
976
cdef int content_change
978
if source_index is None:
979
source_details = DirState.NULL_PARENT_DETAILS
981
source_details = entry[1][source_index]
982
target_details = entry[1][target_index]
983
target_minikind = _minikind_from_string(target_details[0])
984
if path_info is not None and _versioned_minikind(target_minikind):
985
if target_index != 0:
986
raise AssertionError("Unsupported target index %d" % target_index)
987
link_or_sha1 = _update_entry(state, entry, path_info[4], path_info[3])
988
# The entry may have been modified by update_entry
989
target_details = entry[1][target_index]
990
target_minikind = _minikind_from_string(target_details[0])
993
# the rest of this function is 0.3 seconds on 50K paths, or
994
# 0.000006 seconds per call.
995
source_minikind = _minikind_from_string(source_details[0])
996
if ((_versioned_minikind(source_minikind) or source_minikind == c'r')
997
and _versioned_minikind(target_minikind)):
998
# claimed content in both: diff
999
# r | fdlt | | add source to search, add id path move and perform
1000
# | | | diff check on source-target
1001
# r | fdlt | a | dangling file that was present in the basis.
1003
if source_minikind != c'r':
1004
old_dirname = entry[0][0]
1005
old_basename = entry[0][1]
1006
old_path = path = None
1008
# add the source to the search path to find any children it
1009
# has. TODO ? : only add if it is a container ?
1010
if not osutils.is_inside_any(self.searched_specific_files,
1012
self.search_specific_files.add(source_details[1])
1013
# generate the old path; this is needed for stating later
1015
old_path = source_details[1]
1016
old_dirname, old_basename = os.path.split(old_path)
1017
path = pathjoin(entry[0][0], entry[0][1])
1018
old_entry = state._get_entry(source_index,
1020
# update the source details variable to be the real
1022
if old_entry == (None, None):
1023
raise errors.CorruptDirstate(state._filename,
1024
"entry '%s/%s' is considered renamed from %r"
1025
" but source does not exist\n"
1026
"entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
1027
source_details = old_entry[1][source_index]
1028
source_minikind = _minikind_from_string(source_details[0])
1029
if path_info is None:
1030
# the file is missing on disk, show as removed.
1035
# source and target are both versioned and disk file is present.
1036
target_kind = path_info[2]
1037
if target_kind == 'directory':
1039
old_path = path = pathjoin(old_dirname, old_basename)
1040
file_id = entry[0][2]
1041
self.new_dirname_to_file_id[path] = file_id
1042
if source_minikind != c'd':
1045
# directories have no fingerprint
1048
elif target_kind == 'file':
1049
if source_minikind != c'f':
1052
# We could check the size, but we already have the
1054
content_change = (link_or_sha1 != source_details[1])
1055
# Target details is updated at update_entry time
1056
if self.use_filesystem_for_exec:
1057
# We don't need S_ISREG here, because we are sure
1058
# we are dealing with a file.
1059
target_exec = bool(S_IXUSR & path_info[3].st_mode)
1061
target_exec = target_details[3]
1062
elif target_kind == 'symlink':
1063
if source_minikind != c'l':
1066
content_change = (link_or_sha1 != source_details[1])
1068
elif target_kind == 'tree-reference':
1069
if source_minikind != c't':
1075
raise Exception, "unknown kind %s" % path_info[2]
1076
if source_minikind == c'd':
1078
old_path = path = pathjoin(old_dirname, old_basename)
1080
file_id = entry[0][2]
1081
self.old_dirname_to_file_id[old_path] = file_id
1082
# parent id is the entry for the path in the target tree
1083
if old_dirname == self.last_source_parent[0]:
1084
source_parent_id = self.last_source_parent[1]
1087
source_parent_id = self.old_dirname_to_file_id[old_dirname]
1089
source_parent_entry = state._get_entry(source_index,
1090
path_utf8=old_dirname)
1091
source_parent_id = source_parent_entry[0][2]
1092
if source_parent_id == entry[0][2]:
1093
# This is the root, so the parent is None
1094
source_parent_id = None
1096
self.last_source_parent[0] = old_dirname
1097
self.last_source_parent[1] = source_parent_id
1098
new_dirname = entry[0][0]
1099
if new_dirname == self.last_target_parent[0]:
1100
target_parent_id = self.last_target_parent[1]
1103
target_parent_id = self.new_dirname_to_file_id[new_dirname]
1105
# TODO: We don't always need to do the lookup, because the
1106
# parent entry will be the same as the source entry.
1107
target_parent_entry = state._get_entry(target_index,
1108
path_utf8=new_dirname)
1109
if target_parent_entry == (None, None):
1110
raise AssertionError(
1111
"Could not find target parent in wt: %s\nparent of: %s"
1112
% (new_dirname, entry))
1113
target_parent_id = target_parent_entry[0][2]
1114
if target_parent_id == entry[0][2]:
1115
# This is the root, so the parent is None
1116
target_parent_id = None
1118
self.last_target_parent[0] = new_dirname
1119
self.last_target_parent[1] = target_parent_id
1121
source_exec = source_details[3]
1122
if (self.include_unchanged
1124
or source_parent_id != target_parent_id
1125
or old_basename != entry[0][1]
1126
or source_exec != target_exec
1128
if old_path is None:
1129
old_path = path = pathjoin(old_dirname, old_basename)
1130
old_path_u = self.utf8_decode(old_path)[0]
1133
old_path_u = self.utf8_decode(old_path)[0]
1134
if old_path == path:
1137
path_u = self.utf8_decode(path)[0]
1138
source_kind = _minikind_to_kind(source_minikind)
1139
return (entry[0][2],
1140
(old_path_u, path_u),
1143
(source_parent_id, target_parent_id),
1144
(self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1145
(source_kind, target_kind),
1146
(source_exec, target_exec))
1148
return self.uninteresting
1149
elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1150
# looks like a new file
1151
path = pathjoin(entry[0][0], entry[0][1])
1152
# parent id is the entry for the path in the target tree
1153
# TODO: these are the same for an entire directory: cache em.
1154
parent_id = state._get_entry(target_index,
1155
path_utf8=entry[0][0])[0][2]
1156
if parent_id == entry[0][2]:
1158
if path_info is not None:
1160
if self.use_filesystem_for_exec:
1161
# We need S_ISREG here, because we aren't sure if this
1164
S_ISREG(path_info[3].st_mode)
1165
and S_IXUSR & path_info[3].st_mode)
1167
target_exec = target_details[3]
1168
return (entry[0][2],
1169
(None, self.utf8_decode(path)[0]),
1173
(None, self.utf8_decode(entry[0][1])[0]),
1174
(None, path_info[2]),
1175
(None, target_exec))
1177
# Its a missing file, report it as such.
1178
return (entry[0][2],
1179
(None, self.utf8_decode(path)[0]),
1183
(None, self.utf8_decode(entry[0][1])[0]),
1186
elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1187
# unversioned, possibly, or possibly not deleted: we dont care.
1188
# if its still on disk, *and* theres no other entry at this
1189
# path [we dont know this in this routine at the moment -
1190
# perhaps we should change this - then it would be an unknown.
1191
old_path = pathjoin(entry[0][0], entry[0][1])
1192
# parent id is the entry for the path in the target tree
1193
parent_id = state._get_entry(source_index, path_utf8=entry[0][0])[0][2]
1194
if parent_id == entry[0][2]:
1196
return (entry[0][2],
1197
(self.utf8_decode(old_path)[0], None),
1201
(self.utf8_decode(entry[0][1])[0], None),
1202
(_minikind_to_kind(source_minikind), None),
1203
(source_details[3], None))
1204
elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1205
# a rename; could be a true rename, or a rename inherited from
1206
# a renamed parent. TODO: handle this efficiently. Its not
1207
# common case to rename dirs though, so a correct but slow
1208
# implementation will do.
1209
if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1210
self.search_specific_files.add(target_details[1])
1211
elif ((source_minikind == c'r' or source_minikind == c'a') and
1212
(target_minikind == c'r' or target_minikind == c'a')):
1213
# neither of the selected trees contain this file,
1214
# so skip over it. This is not currently directly tested, but
1215
# is indirectly via test_too_much.TestCommands.test_conflicts.
1218
raise AssertionError("don't know how to compare "
1219
"source_minikind=%r, target_minikind=%r"
1220
% (source_minikind, target_minikind))
1221
## import pdb;pdb.set_trace()