197
260
# choke on a Unicode string containing a relative path if
198
261
# os.getcwd() returns a non-sys.getdefaultencoding()-encoded
200
_fs_enc = sys.getfilesystemencoding()
263
_fs_enc = sys.getfilesystemencoding() or 'utf-8'
201
264
def _posix_abspath(path):
202
return os.path.abspath(path.encode(_fs_enc)).decode(_fs_enc)
203
# jam 20060426 This is another possibility which mimics
204
# os.path.abspath, only uses unicode characters instead
205
# if not os.path.isabs(path):
206
# return os.path.join(os.getcwdu(), path)
265
# jam 20060426 rather than encoding to fsencoding
266
# copy posixpath.abspath, but use os.getcwdu instead
267
if not posixpath.isabs(path):
268
path = posixpath.join(getcwd(), path)
269
return posixpath.normpath(path)
210
272
def _posix_realpath(path):
211
return os.path.realpath(path.encode(_fs_enc)).decode(_fs_enc)
273
return posixpath.realpath(path.encode(_fs_enc)).decode(_fs_enc)
276
def _win32_fixdrive(path):
277
"""Force drive letters to be consistent.
279
win32 is inconsistent whether it returns lower or upper case
280
and even if it was consistent the user might type the other
281
so we force it to uppercase
282
running python.exe under cmd.exe return capital C:\\
283
running win32 python inside a cygwin shell returns lowercase c:\\
285
drive, path = _nt_splitdrive(path)
286
return drive.upper() + path
214
289
def _win32_abspath(path):
215
return _nt_abspath(path.encode(_fs_enc)).decode(_fs_enc).replace('\\', '/')
290
# Real _nt_abspath doesn't have a problem with a unicode cwd
291
return _win32_fixdrive(_nt_abspath(unicode(path)).replace('\\', '/'))
294
def _win98_abspath(path):
295
"""Return the absolute version of a path.
296
Windows 98 safe implementation (python reimplementation
297
of Win32 API function GetFullPathNameW)
302
# \\HOST\path => //HOST/path
303
# //HOST/path => //HOST/path
304
# path => C:/cwd/path
307
# check for absolute path
308
drive = _nt_splitdrive(path)[0]
309
if drive == '' and path[:2] not in('//','\\\\'):
311
# we cannot simply os.path.join cwd and path
312
# because os.path.join('C:','/path') produce '/path'
313
# and this is incorrect
314
if path[:1] in ('/','\\'):
315
cwd = _nt_splitdrive(cwd)[0]
317
path = cwd + '\\' + path
318
return _win32_fixdrive(_nt_normpath(path).replace('\\', '/'))
320
if win32utils.winver == 'Windows 98':
321
_win32_abspath = _win98_abspath
218
324
def _win32_realpath(path):
219
return _nt_realpath(path.encode(_fs_enc)).decode(_fs_enc).replace('\\', '/')
325
# Real _nt_realpath doesn't have a problem with a unicode cwd
326
return _win32_fixdrive(_nt_realpath(unicode(path)).replace('\\', '/'))
222
329
def _win32_pathjoin(*args):
728
1016
return _platform_normalizes_filenames
1019
def _accessible_normalized_filename(path):
1020
"""Get the unicode normalized path, and if you can access the file.
1022
On platforms where the system normalizes filenames (Mac OSX),
1023
you can access a file by any path which will normalize correctly.
1024
On platforms where the system does not normalize filenames
1025
(Windows, Linux), you have to access a file by its exact path.
1027
Internally, bzr only supports NFC normalization, since that is
1028
the standard for XML documents.
1030
So return the normalized path, and a flag indicating if the file
1031
can be accessed by that path.
1034
return unicodedata.normalize('NFC', unicode(path)), True
1037
def _inaccessible_normalized_filename(path):
1038
__doc__ = _accessible_normalized_filename.__doc__
1040
normalized = unicodedata.normalize('NFC', unicode(path))
1041
return normalized, normalized == path
731
1044
if _platform_normalizes_filenames:
732
def unicode_filename(path):
733
"""Make sure 'path' is a properly normalized filename.
735
On platforms where the system normalizes filenames (Mac OSX),
736
you can access a file by any path which will normalize
738
Internally, bzr only supports NFC/NFKC normalization, since
739
that is the standard for XML documents.
740
So we return an normalized path, and indicate this has been
743
:return: (path, is_normalized) Return a path which can
744
access the file, and whether or not this path is
747
return unicodedata.normalize('NFKC', path), True
1045
normalized_filename = _accessible_normalized_filename
749
def unicode_filename(path):
750
"""Make sure 'path' is a properly normalized filename.
752
On platforms where the system does not normalize filenames
753
(Windows, Linux), you have to access a file by its exact path.
754
Internally, bzr only supports NFC/NFKC normalization, since
755
that is the standard for XML documents.
756
So we return the original path, and indicate if this is
759
:return: (path, is_normalized) Return a path which can
760
access the file, and whether or not this path is
763
return path, unicodedata.normalize('NFKC', path) == path
1047
normalized_filename = _inaccessible_normalized_filename
766
1050
def terminal_width():
767
1051
"""Return estimated terminal width."""
768
1052
if sys.platform == 'win32':
769
import bzrlib.win32console
770
return bzrlib.win32console.get_console_size()[0]
1053
return win32utils.get_console_size()[0]
773
1056
import struct, fcntl, termios
812
1156
to exclude some directories, they are then not descended into.
814
1158
The data yielded is of the form:
815
[(relpath, basename, kind, lstat, path_from_top), ...]
1159
((directory-relpath, directory-path-from-top),
1160
[(relpath, basename, kind, lstat, path-from-top), ...]),
1161
- directory-relpath is the relative path of the directory being returned
1162
with respect to top. prefix is prepended to this.
1163
- directory-path-from-root is the path including top for this directory.
1164
It is suitable for use with os functions.
1165
- relpath is the relative path within the subtree being walked.
1166
- basename is the basename of the path
1167
- kind is the kind of the file now. If unknown then the file is not
1168
present within the tree - but it may be recorded as versioned. See
1170
- lstat is the stat data *if* the file was statted.
1171
- planned, not implemented:
1172
path_from_tree_root is the path from the root of the tree.
1174
:param prefix: Prefix the relpaths that are yielded with 'prefix'. This
1175
allows one to walk a subtree but get paths that are relative to a tree
817
1177
:return: an iterator over the dirs.
821
_directory = _directory_kind
1179
#TODO there is a bit of a smell where the results of the directory-
1180
# summary in this, and the path from the root, may not agree
1181
# depending on top and prefix - i.e. ./foo and foo as a pair leads to
1182
# potentially confusing output. We should make this more robust - but
1183
# not at a speed cost. RBC 20060731
1185
_directory = _directory_kind
1186
_listdir = os.listdir
1187
_kind_from_mode = _formats.get
1188
pending = [(safe_unicode(prefix), "", _directory, None, safe_unicode(top))]
1190
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1191
relroot, _, _, _, top = pending.pop()
1193
relprefix = relroot + u'/'
1196
top_slash = top + u'/'
1199
append = dirblock.append
1201
names = sorted(_listdir(top))
1203
if not _is_error_enotdir(e):
1207
abspath = top_slash + name
1208
statvalue = _lstat(abspath)
1209
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1210
append((relprefix + name, name, kind, statvalue, abspath))
1211
yield (relroot, top), dirblock
1213
# push the user specified dirs from dirblock
1214
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1217
_real_walkdirs_utf8 = None
1219
def _walkdirs_utf8(top, prefix=""):
1220
"""Yield data about all the directories in a tree.
1222
This yields the same information as walkdirs() only each entry is yielded
1223
in utf-8. On platforms which have a filesystem encoding of utf8 the paths
1224
are returned as exact byte-strings.
1226
:return: yields a tuple of (dir_info, [file_info])
1227
dir_info is (utf8_relpath, path-from-top)
1228
file_info is (utf8_relpath, utf8_name, kind, lstat, path-from-top)
1229
if top is an absolute path, path-from-top is also an absolute path.
1230
path-from-top might be unicode or utf8, but it is the correct path to
1231
pass to os functions to affect the file in question. (such as os.lstat)
1233
global _real_walkdirs_utf8
1234
if _real_walkdirs_utf8 is None:
1235
fs_encoding = _fs_enc.upper()
1236
if win32utils.winver == 'Windows NT':
1237
# Win98 doesn't have unicode apis like FindFirstFileW
1238
# TODO: We possibly could support Win98 by falling back to the
1239
# original FindFirstFile, and using TCHAR instead of WCHAR,
1240
# but that gets a bit tricky, and requires custom compiling
1243
from bzrlib._walkdirs_win32 import _walkdirs_utf8_win32_find_file
1245
_real_walkdirs_utf8 = _walkdirs_unicode_to_utf8
1247
_real_walkdirs_utf8 = _walkdirs_utf8_win32_find_file
1248
elif fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):
1249
# ANSI_X3.4-1968 is a form of ASCII
1250
_real_walkdirs_utf8 = _walkdirs_unicode_to_utf8
1252
_real_walkdirs_utf8 = _walkdirs_fs_utf8
1253
return _real_walkdirs_utf8(top, prefix=prefix)
1256
def _walkdirs_fs_utf8(top, prefix=""):
1257
"""See _walkdirs_utf8.
1259
This sub-function is called when we know the filesystem is already in utf8
1260
encoding. So we don't need to transcode filenames.
1263
_directory = _directory_kind
1264
# Use C accelerated directory listing.
822
1265
_listdir = read_dir
823
pending = [("", "", _directory, None, top)]
1266
_kind_from_mode = _formats.get
1268
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1269
# But we don't actually uses 1-3 in pending, so set them to None
1270
pending = [(safe_utf8(prefix), None, None, None, safe_utf8(top))]
1272
relroot, _, _, _, top = pending.pop()
1274
relprefix = relroot + '/'
1277
top_slash = top + '/'
826
currentdir = pending.pop()
827
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
830
relroot = currentdir[0] + '/'
1280
append = dirblock.append
833
1281
for name, kind in sorted(_listdir(top)):
834
abspath = top + '/' + name
1282
abspath = top_slash + name
835
1283
if kind == 'unknown':
836
statvalue = lstat(abspath)
837
kind = file_kind_from_stat_mode(statvalue.st_mode)
1284
statvalue = _lstat(abspath)
1285
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
839
1287
statvalue = None
840
dirblock.append ((relroot + name, name, kind, statvalue, abspath))
842
# push the user specified dirs from dirblock
843
for dir in reversed(dirblock):
844
if dir[2] == _directory:
1288
statvalue = _lstat(abspath)
1289
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1290
append((relprefix + name, name, kind, statvalue, abspath))
1291
yield (relroot, top), dirblock
1293
# push the user specified dirs from dirblock
1294
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1297
def _walkdirs_unicode_to_utf8(top, prefix=""):
1298
"""See _walkdirs_utf8
1300
Because Win32 has a Unicode api, all of the 'path-from-top' entries will be
1302
This is currently the fallback code path when the filesystem encoding is
1303
not UTF-8. It may be better to implement an alternative so that we can
1304
safely handle paths that are not properly decodable in the current
1307
_utf8_encode = codecs.getencoder('utf8')
1309
_directory = _directory_kind
1310
_listdir = os.listdir
1311
_kind_from_mode = _formats.get
1313
pending = [(safe_utf8(prefix), None, None, None, safe_unicode(top))]
1315
relroot, _, _, _, top = pending.pop()
1317
relprefix = relroot + '/'
1320
top_slash = top + u'/'
1323
append = dirblock.append
1324
for name in sorted(_listdir(top)):
1325
name_utf8 = _utf8_encode(name)[0]
1326
abspath = top_slash + name
1327
statvalue = _lstat(abspath)
1328
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1329
append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
1330
yield (relroot, top), dirblock
1332
# push the user specified dirs from dirblock
1333
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1336
def copy_tree(from_path, to_path, handlers={}):
1337
"""Copy all of the entries in from_path into to_path.
1339
:param from_path: The base directory to copy.
1340
:param to_path: The target directory. If it does not exist, it will
1342
:param handlers: A dictionary of functions, which takes a source and
1343
destinations for files, directories, etc.
1344
It is keyed on the file kind, such as 'directory', 'symlink', or 'file'
1345
'file', 'directory', and 'symlink' should always exist.
1346
If they are missing, they will be replaced with 'os.mkdir()',
1347
'os.readlink() + os.symlink()', and 'shutil.copy2()', respectively.
1349
# Now, just copy the existing cached tree to the new location
1350
# We use a cheap trick here.
1351
# Absolute paths are prefixed with the first parameter
1352
# relative paths are prefixed with the second.
1353
# So we can get both the source and target returned
1354
# without any extra work.
1356
def copy_dir(source, dest):
1359
def copy_link(source, dest):
1360
"""Copy the contents of a symlink"""
1361
link_to = os.readlink(source)
1362
os.symlink(link_to, dest)
1364
real_handlers = {'file':shutil.copy2,
1365
'symlink':copy_link,
1366
'directory':copy_dir,
1368
real_handlers.update(handlers)
1370
if not os.path.exists(to_path):
1371
real_handlers['directory'](from_path, to_path)
1373
for dir_info, entries in walkdirs(from_path, prefix=to_path):
1374
for relpath, name, kind, st, abspath in entries:
1375
real_handlers[kind](abspath, relpath)
1378
def path_prefix_key(path):
1379
"""Generate a prefix-order path key for path.
1381
This can be used to sort paths in the same way that walkdirs does.
1383
return (dirname(path) , path)
1386
def compare_paths_prefix_order(path_a, path_b):
1387
"""Compare path_a and path_b to generate the same order walkdirs uses."""
1388
key_a = path_prefix_key(path_a)
1389
key_b = path_prefix_key(path_b)
1390
return cmp(key_a, key_b)
1393
_cached_user_encoding = None
1396
def get_user_encoding(use_cache=True):
1397
"""Find out what the preferred user encoding is.
1399
This is generally the encoding that is used for command line parameters
1400
and file contents. This may be different from the terminal encoding
1401
or the filesystem encoding.
1403
:param use_cache: Enable cache for detected encoding.
1404
(This parameter is turned on by default,
1405
and required only for selftesting)
1407
:return: A string defining the preferred user encoding
1409
global _cached_user_encoding
1410
if _cached_user_encoding is not None and use_cache:
1411
return _cached_user_encoding
1413
if sys.platform == 'darwin':
1414
# work around egregious python 2.4 bug
1415
sys.platform = 'posix'
1419
sys.platform = 'darwin'
1424
user_encoding = locale.getpreferredencoding()
1425
except locale.Error, e:
1426
sys.stderr.write('bzr: warning: %s\n'
1427
' Could not determine what text encoding to use.\n'
1428
' This error usually means your Python interpreter\n'
1429
' doesn\'t support the locale set by $LANG (%s)\n'
1430
" Continuing with ascii encoding.\n"
1431
% (e, os.environ.get('LANG')))
1432
user_encoding = 'ascii'
1434
# Windows returns 'cp0' to indicate there is no code page. So we'll just
1435
# treat that as ASCII, and not support printing unicode characters to the
1438
# For python scripts run under vim, we get '', so also treat that as ASCII
1439
if user_encoding in (None, 'cp0', ''):
1440
user_encoding = 'ascii'
1444
codecs.lookup(user_encoding)
1446
sys.stderr.write('bzr: warning:'
1447
' unknown encoding %s.'
1448
' Continuing with ascii encoding.\n'
1451
user_encoding = 'ascii'
1454
_cached_user_encoding = user_encoding
1456
return user_encoding
1459
def recv_all(socket, bytes):
1460
"""Receive an exact number of bytes.
1462
Regular Socket.recv() may return less than the requested number of bytes,
1463
dependning on what's in the OS buffer. MSG_WAITALL is not available
1464
on all platforms, but this should work everywhere. This will return
1465
less than the requested amount if the remote end closes.
1467
This isn't optimized and is intended mostly for use in testing.
1470
while len(b) < bytes:
1471
new = socket.recv(bytes - len(b))
1478
def send_all(socket, bytes):
1479
"""Send all bytes on a socket.
1481
Regular socket.sendall() can give socket error 10053 on Windows. This
1482
implementation sends no more than 64k at a time, which avoids this problem.
1485
for pos in xrange(0, len(bytes), chunk_size):
1486
socket.sendall(bytes[pos:pos+chunk_size])
1489
def dereference_path(path):
1490
"""Determine the real path to a file.
1492
All parent elements are dereferenced. But the file itself is not
1494
:param path: The original path. May be absolute or relative.
1495
:return: the real path *to* the file
1497
parent, base = os.path.split(path)
1498
# The pathjoin for '.' is a workaround for Python bug #1213894.
1499
# (initial path components aren't dereferenced)
1500
return pathjoin(realpath(pathjoin('.', parent)), base)
1503
def supports_mapi():
1504
"""Return True if we can use MAPI to launch a mail client."""
1505
return sys.platform == "win32"
1508
def resource_string(package, resource_name):
1509
"""Load a resource from a package and return it as a string.
1511
Note: Only packages that start with bzrlib are currently supported.
1513
This is designed to be a lightweight implementation of resource
1514
loading in a way which is API compatible with the same API from
1516
http://peak.telecommunity.com/DevCenter/PkgResources#basic-resource-access.
1517
If and when pkg_resources becomes a standard library, this routine
1520
# Check package name is within bzrlib
1521
if package == "bzrlib":
1522
resource_relpath = resource_name
1523
elif package.startswith("bzrlib."):
1524
package = package[len("bzrlib."):].replace('.', os.sep)
1525
resource_relpath = pathjoin(package, resource_name)
1527
raise errors.BzrError('resource package %s not in bzrlib' % package)
1529
# Map the resource to a file and read its contents
1530
base = dirname(bzrlib.__file__)
1531
if getattr(sys, 'frozen', None): # bzr.exe
1532
base = abspath(pathjoin(base, '..', '..'))
1533
filename = pathjoin(base, resource_relpath)
1534
return open(filename, 'rU').read()