119
131
_directory_kind = 'directory'
122
stat.S_IFDIR:_directory_kind,
123
stat.S_IFCHR:'chardev',
124
stat.S_IFBLK:'block',
127
stat.S_IFLNK:'symlink',
128
stat.S_IFSOCK:'socket',
132
def file_kind_from_stat_mode(stat_mode, _formats=_formats, _unknown='unknown'):
133
"""Generate a file kind from a stat mode. This is used in walkdirs.
135
Its performance is critical: Do not mutate without careful benchmarking.
138
return _formats[stat_mode & 0170000]
143
def file_kind(f, _lstat=os.lstat, _mapper=file_kind_from_stat_mode):
145
return _mapper(_lstat(f).st_mode)
147
if getattr(e, 'errno', None) in (errno.ENOENT, errno.ENOTDIR):
148
raise errors.NoSuchFile(f)
153
134
"""Return the current umask"""
154
135
# Assume that people aren't messing with the umask while running
624
def sha_strings(strings):
605
def sha_strings(strings, _factory=sha):
625
606
"""Return the sha-1 of concatenation of strings"""
626
# Do some hackery here to install an optimised version of this function on
627
# the first invocation of this function. (We don't define it like this
628
# initially so that we can avoid loading the sha module, which takes up to
629
# 2ms, unless we need to.)
631
def _sha_strings(strings, _factory=sha.new):
632
"""Return the sha-1 of concatenation of strings"""
634
map(s.update, strings)
636
sha_strings = _sha_strings
637
# Now that we've installed the real version, call it.
638
return sha_strings(strings)
643
def sha_string(f, _factory=sha.new):
644
return _factory(f).hexdigest()
608
map(s.update, strings)
612
def sha_string(f, _factory=sha):
613
return _factory(f).hexdigest()
649
616
def fingerprint_file(f):
651
618
return {'size': len(b),
652
'sha1': sha.new(b).hexdigest()}
619
'sha1': sha(b).hexdigest()}
655
622
def compare_files(a, b):
1242
1209
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1245
_real_walkdirs_utf8 = None
1212
class DirReader(object):
1213
"""An interface for reading directories."""
1215
def top_prefix_to_starting_dir(self, top, prefix=""):
1216
"""Converts top and prefix to a starting dir entry
1218
:param top: A utf8 path
1219
:param prefix: An optional utf8 path to prefix output relative paths
1221
:return: A tuple starting with prefix, and ending with the native
1224
raise NotImplementedError(self.top_prefix_to_starting_dir)
1226
def read_dir(self, prefix, top):
1227
"""Read a specific dir.
1229
:param prefix: A utf8 prefix to be preprended to the path basenames.
1230
:param top: A natively encoded path to read.
1231
:return: A list of the directories contents. Each item contains:
1232
(utf8_relpath, utf8_name, kind, lstatvalue, native_abspath)
1234
raise NotImplementedError(self.read_dir)
1237
_selected_dir_reader = None
1247
1240
def _walkdirs_utf8(top, prefix=""):
1248
1241
"""Yield data about all the directories in a tree.
1268
1261
# but that gets a bit tricky, and requires custom compiling
1269
1262
# for win98 anyway.
1271
from bzrlib._walkdirs_win32 import _walkdirs_utf8_win32_find_file
1264
from bzrlib._walkdirs_win32 import Win32ReadDir
1272
1265
except ImportError:
1273
_real_walkdirs_utf8 = _walkdirs_unicode_to_utf8
1266
_selected_dir_reader = UnicodeDirReader()
1275
_real_walkdirs_utf8 = _walkdirs_utf8_win32_find_file
1268
_selected_dir_reader = Win32ReadDir()
1276
1269
elif fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):
1277
1270
# ANSI_X3.4-1968 is a form of ASCII
1278
_real_walkdirs_utf8 = _walkdirs_unicode_to_utf8
1271
_selected_dir_reader = UnicodeDirReader()
1280
_real_walkdirs_utf8 = _walkdirs_fs_utf8
1281
return _real_walkdirs_utf8(top, prefix=prefix)
1284
def _walkdirs_fs_utf8(top, prefix=""):
1285
"""See _walkdirs_utf8.
1287
This sub-function is called when we know the filesystem is already in utf8
1288
encoding. So we don't need to transcode filenames.
1291
_directory = _directory_kind
1292
# Use C accelerated directory listing.
1293
_listdir = _read_dir
1294
_kind_from_mode = _formats.get
1274
from bzrlib._readdir_pyx import UTF8DirReader
1276
# No optimised code path
1277
_selected_dir_reader = UnicodeDirReader()
1279
_selected_dir_reader = UTF8DirReader()
1296
1280
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1297
1281
# But we don't actually uses 1-3 in pending, so set them to None
1298
pending = [(safe_utf8(prefix), None, None, None, safe_utf8(top))]
1282
pending = [[_selected_dir_reader.top_prefix_to_starting_dir(top, prefix)]]
1283
read_dir = _selected_dir_reader.read_dir
1284
_directory = _directory_kind
1300
relroot, _, _, _, top = pending.pop()
1302
relprefix = relroot + '/'
1305
top_slash = top + '/'
1308
append = dirblock.append
1309
# read_dir supplies in should-stat order.
1310
for _, name in sorted(_listdir(top)):
1311
abspath = top_slash + name
1312
statvalue = _lstat(abspath)
1313
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1314
append((relprefix + name, name, kind, statvalue, abspath))
1286
relroot, _, _, _, top = pending[-1].pop()
1289
dirblock = sorted(read_dir(relroot, top))
1316
1290
yield (relroot, top), dirblock
1318
1291
# push the user specified dirs from dirblock
1319
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1322
def _walkdirs_unicode_to_utf8(top, prefix=""):
1323
"""See _walkdirs_utf8
1325
Because Win32 has a Unicode api, all of the 'path-from-top' entries will be
1327
This is currently the fallback code path when the filesystem encoding is
1328
not UTF-8. It may be better to implement an alternative so that we can
1329
safely handle paths that are not properly decodable in the current
1332
_utf8_encode = codecs.getencoder('utf8')
1334
_directory = _directory_kind
1335
_listdir = os.listdir
1336
_kind_from_mode = _formats.get
1338
pending = [(safe_utf8(prefix), None, None, None, safe_unicode(top))]
1340
relroot, _, _, _, top = pending.pop()
1342
relprefix = relroot + '/'
1292
next = [d for d in reversed(dirblock) if d[2] == _directory]
1294
pending.append(next)
1297
class UnicodeDirReader(DirReader):
1298
"""A dir reader for non-utf8 file systems, which transcodes."""
1300
__slots__ = ['_utf8_encode']
1303
self._utf8_encode = codecs.getencoder('utf8')
1305
def top_prefix_to_starting_dir(self, top, prefix=""):
1306
"""See DirReader.top_prefix_to_starting_dir."""
1307
return (safe_utf8(prefix), None, None, None, safe_unicode(top))
1309
def read_dir(self, prefix, top):
1310
"""Read a single directory from a non-utf8 file system.
1312
top, and the abspath element in the output are unicode, all other paths
1313
are utf8. Local disk IO is done via unicode calls to listdir etc.
1315
This is currently the fallback code path when the filesystem encoding is
1316
not UTF-8. It may be better to implement an alternative so that we can
1317
safely handle paths that are not properly decodable in the current
1320
See DirReader.read_dir for details.
1322
_utf8_encode = self._utf8_encode
1324
_listdir = os.listdir
1325
_kind_from_mode = file_kind_from_stat_mode
1328
relprefix = prefix + '/'
1345
1331
top_slash = top + u'/'
1348
1334
append = dirblock.append
1349
1335
for name in sorted(_listdir(top)):
1350
name_utf8 = _utf8_encode(name)[0]
1337
name_utf8 = _utf8_encode(name)[0]
1338
except UnicodeDecodeError:
1339
raise errors.BadFilenameEncoding(
1340
_utf8_encode(relprefix)[0] + name, _fs_enc)
1351
1341
abspath = top_slash + name
1352
1342
statvalue = _lstat(abspath)
1353
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1343
kind = _kind_from_mode(statvalue.st_mode)
1354
1344
append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
1355
yield (relroot, top), dirblock
1357
# push the user specified dirs from dirblock
1358
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1361
1348
def copy_tree(from_path, to_path, handlers={}):
1436
1423
return _cached_user_encoding
1438
1425
if sys.platform == 'darwin':
1439
# work around egregious python 2.4 bug
1426
# python locale.getpreferredencoding() always return
1427
# 'mac-roman' on darwin. That's a lie.
1440
1428
sys.platform = 'posix'
1430
if os.environ.get('LANG', None) is None:
1431
# If LANG is not set, we end up with 'ascii', which is bad
1432
# ('mac-roman' is more than ascii), so we set a default which
1433
# will give us UTF-8 (which appears to work in all cases on
1434
# OSX). Users are still free to override LANG of course, as
1435
# long as it give us something meaningful. This work-around
1436
# *may* not be needed with python 3k and/or OSX 10.5, but will
1437
# work with them too -- vila 20080908
1438
os.environ['LANG'] = 'en_US.UTF-8'
1444
1441
sys.platform = 'darwin'
1573
1570
return open(filename, 'rU').read()
1577
from bzrlib._readdir_pyx import read_dir as _read_dir
1579
from bzrlib._readdir_py import read_dir as _read_dir
1573
def file_kind_from_stat_mode_thunk(mode):
1574
global file_kind_from_stat_mode
1575
if file_kind_from_stat_mode is file_kind_from_stat_mode_thunk:
1577
from bzrlib._readdir_pyx import UTF8DirReader
1578
file_kind_from_stat_mode = UTF8DirReader().kind_from_mode
1580
from bzrlib._readdir_py import (
1581
_kind_from_mode as file_kind_from_stat_mode
1583
return file_kind_from_stat_mode(mode)
1584
file_kind_from_stat_mode = file_kind_from_stat_mode_thunk
1587
def file_kind(f, _lstat=os.lstat):
1589
return file_kind_from_stat_mode(_lstat(f).st_mode)
1591
if getattr(e, 'errno', None) in (errno.ENOENT, errno.ENOTDIR):
1592
raise errors.NoSuchFile(f)