1
# Bazaar-NG -- distributed version control
3
# Copyright (C) 2005 by Canonical Ltd
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
from shutil import copyfile
20
from stat import (S_ISREG, S_ISDIR, S_ISLNK, ST_MODE, ST_SIZE,
21
S_ISCHR, S_ISBLK, S_ISFIFO, S_ISSOCK)
22
from cStringIO import StringIO
34
from ntpath import (abspath as _nt_abspath,
36
normpath as _nt_normpath,
37
realpath as _nt_realpath,
41
from bzrlib.errors import (BzrError,
42
BzrBadParameterNotUnicode,
48
from bzrlib.trace import mutter
51
def make_readonly(filename):
52
"""Make a filename read-only."""
53
mod = os.stat(filename).st_mode
55
os.chmod(filename, mod)
58
def make_writable(filename):
59
mod = os.stat(filename).st_mode
61
os.chmod(filename, mod)
68
"""Return a quoted filename filename
70
This previously used backslash quoting, but that works poorly on
72
# TODO: I'm not really sure this is the best format either.x
75
_QUOTE_RE = re.compile(r'([^a-zA-Z0-9.,:/\\_~-])')
77
if _QUOTE_RE.search(f):
84
mode = os.lstat(f)[ST_MODE]
103
def kind_marker(kind):
106
elif kind == 'directory':
108
elif kind == 'symlink':
111
raise BzrError('invalid file kind %r' % kind)
114
if hasattr(os.path, 'lexists'):
115
return os.path.lexists(f)
117
if hasattr(os, 'lstat'):
123
if e.errno == errno.ENOENT:
126
raise BzrError("lstat/stat of (%r): %r" % (f, e))
128
def fancy_rename(old, new, rename_func, unlink_func):
129
"""A fancy rename, when you don't have atomic rename.
131
:param old: The old path, to rename from
132
:param new: The new path, to rename to
133
:param rename_func: The potentially non-atomic rename function
134
:param unlink_func: A way to delete the target file if the full rename succeeds
137
# sftp rename doesn't allow overwriting, so play tricks:
139
base = os.path.basename(new)
140
dirname = os.path.dirname(new)
141
tmp_name = u'tmp.%s.%.9f.%d.%s' % (base, time.time(), os.getpid(), rand_chars(10))
142
tmp_name = pathjoin(dirname, tmp_name)
144
# Rename the file out of the way, but keep track if it didn't exist
145
# We don't want to grab just any exception
146
# something like EACCES should prevent us from continuing
147
# The downside is that the rename_func has to throw an exception
148
# with an errno = ENOENT, or NoSuchFile
151
rename_func(new, tmp_name)
152
except (NoSuchFile,), e:
155
# RBC 20060103 abstraction leakage: the paramiko SFTP clients rename
156
# function raises an IOError with errno == None when a rename fails.
157
# This then gets caught here.
158
if e.errno not in (None, errno.ENOENT, errno.ENOTDIR):
161
if (not hasattr(e, 'errno')
162
or e.errno not in (errno.ENOENT, errno.ENOTDIR)):
169
# This may throw an exception, in which case success will
171
rename_func(old, new)
175
# If the file used to exist, rename it back into place
176
# otherwise just delete it from the tmp location
178
unlink_func(tmp_name)
180
rename_func(tmp_name, new)
183
def urlescape(relpath):
184
"""Escape relpath to be a valid url."""
185
if isinstance(relpath, unicode):
186
relpath = relpath.encode('utf-8')
187
# After quoting and encoding, the path should be perfectly
188
# safe as a plain ASCII string, str() just enforces this
189
return str(urllib.quote(relpath))
192
def urlunescape(url):
193
"""Unescape relpath from url format.
195
This returns a Unicode path from a URL
197
# jam 20060427 URLs are supposed to be ASCII only strings
198
# If they are passed in as unicode, urllib.unquote
199
# will return a UNICODE string, which actually contains
200
# utf-8 bytes. So we have to ensure that they are
201
# plain ASCII strings, or the final .decode will
202
# try to encode the UNICODE => ASCII, and then decode
206
except UnicodeError, e:
207
raise InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
208
unquoted = urllib.unquote(url)
210
unicode_path = unquoted.decode('utf-8')
211
except UnicodeError, e:
212
raise InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
216
# These are characters that if escaped, should stay that way
217
_no_decode_chars = ';/?:@&=+$,#'
218
_no_decode_ords = [ord(c) for c in _no_decode_chars]
219
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
220
+ ['%02X' % o for o in _no_decode_ords])
221
_hex_display_map = urllib._hextochr.copy()
222
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
223
#These entries get mapped to themselves
226
def urlfordisplay(url):
227
"""Decode what you can for a URL, so that we get a nice looking path.
229
This will turn file:// urls into local paths, and try to decode
230
any portions of a http:// style url that it can.
232
if url.startswith('file://'):
233
return local_path_from_url(url)
235
# Split into sections to try to decode utf-8
237
for i in xrange(1, len(res)):
238
escaped_chunks = res[i].split('%')
239
for j in xrange(1, len(escaped_chunks)):
240
item = escaped_chunks[j]
242
escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
244
# Put back the percent symbol
245
escaped_chunks[j] = '%' + item
246
except UnicodeDecodeError:
247
escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
248
unescaped = ''.join(escaped_chunks)
250
res[i] = unescaped.decode('utf-8')
251
except UnicodeDecodeError:
252
# If this path segment cannot be properly utf-8 decoded
253
# after doing unescaping we will just leave it alone
257
def _posix_local_path_to_url(path):
258
"""Convert a local path like ./foo into a URL like file:///path/to/foo
260
This also handles transforming escaping unicode characters, etc.
262
# importing directly from posixpath allows us to test this
263
# on non-posix platforms
264
from posixpath import normpath
265
return 'file://' + urlescape(normpath(_posix_abspath(path)))
268
def _posix_local_path_from_url(url):
269
"""Convert a url like file:///path/to/foo into /path/to/foo"""
270
if not url.startswith('file:///'):
271
raise InvalidURL(url, 'local urls must start with file:///')
272
# We only strip off 2 slashes
273
return urlunescape(url[len('file://'):])
276
def _win32_local_path_to_url(path):
277
"""Convert a local path like ./foo into a URL like file:///C|/path/to/foo
279
This also handles transforming escaping unicode characters, etc.
281
# importing directly from ntpath allows us to test this
282
# on non-win32 platforms
283
# TODO: jam 20060426 consider moving this import outside of the function
284
win32_path = _nt_normpath(_win32_abspath(path)).replace('\\', '/')
285
return 'file:///' + win32_path[0] + '|' + urlescape(win32_path[2:])
288
def _win32_local_path_from_url(url):
289
"""Convert a url like file:///C|/path/to/foo into C:/path/to/foo"""
290
if not url.startswith('file:///'):
291
raise InvalidURL(url, 'local urls must start with file:///')
292
# We strip off all 3 slashes
293
win32_url = url[len('file:///'):]
294
if (win32_url[0] not in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
295
or win32_url[1] not in '|:'
296
or win32_url[2] != '/'):
297
raise InvalidURL(url, 'Win32 file urls start with file:///X|/, where X is a valid drive letter')
298
# TODO: jam 20060426, we could .upper() or .lower() the drive letter
299
# for better consistency.
300
return win32_url[0] + u':' + urlunescape(win32_url[2:])
303
# In Python 2.4.2 and older, os.path.abspath and os.path.realpath
304
# choke on a Unicode string containing a relative path if
305
# os.getcwd() returns a non-sys.getdefaultencoding()-encoded
307
_fs_enc = sys.getfilesystemencoding()
308
def _posix_abspath(path):
309
return os.path.abspath(path.encode(_fs_enc)).decode(_fs_enc)
310
# jam 20060426 This is another possibility which mimics
311
# os.path.abspath, only uses unicode characters instead
312
# if not os.path.isabs(path):
313
# return os.path.join(os.getcwdu(), path)
317
def _posix_realpath(path):
318
return os.path.realpath(path.encode(_fs_enc)).decode(_fs_enc)
321
def _win32_abspath(path):
322
return _nt_abspath(path.encode(_fs_enc)).decode(_fs_enc).replace('\\', '/')
325
def _win32_realpath(path):
326
return _nt_realpath(path.encode(_fs_enc)).decode(_fs_enc).replace('\\', '/')
329
def _win32_pathjoin(*args):
330
return _nt_join(*args).replace('\\', '/')
333
def _win32_normpath(path):
334
return _nt_normpath(path).replace('\\', '/')
338
return os.getcwdu().replace('\\', '/')
341
def _win32_mkdtemp(*args, **kwargs):
342
return tempfile.mkdtemp(*args, **kwargs).replace('\\', '/')
345
def _win32_rename(old, new):
346
fancy_rename(old, new, rename_func=os.rename, unlink_func=os.unlink)
349
# Default is to just use the python builtins
350
abspath = _posix_abspath
351
realpath = _posix_realpath
352
pathjoin = os.path.join
353
normpath = os.path.normpath
355
mkdtemp = tempfile.mkdtemp
357
dirname = os.path.dirname
358
basename = os.path.basename
359
local_path_to_url = _posix_local_path_to_url
360
local_path_from_url = _posix_local_path_from_url
362
MIN_ABS_PATHLENGTH = 1
363
MIN_ABS_URLPATHLENGTH = len('file:///')
366
if sys.platform == 'win32':
367
abspath = _win32_abspath
368
realpath = _win32_realpath
369
pathjoin = _win32_pathjoin
370
normpath = _win32_normpath
371
getcwd = _win32_getcwd
372
mkdtemp = _win32_mkdtemp
373
rename = _win32_rename
375
local_path_to_url = _win32_local_path_to_url
376
local_path_from_url = _win32_local_path_from_url
378
MIN_ABS_PATHLENGTH = 3
379
MIN_ABS_URLPATHLENGTH = len('file:///C|/')
382
def normalizepath(f):
383
if hasattr(os.path, 'realpath'):
387
[p,e] = os.path.split(f)
388
if e == "" or e == "." or e == "..":
391
return pathjoin(F(p), e)
395
"""Copy a file to a backup.
397
Backups are named in GNU-style, with a ~ suffix.
399
If the file is already a backup, it's not copied.
405
if has_symlinks() and os.path.islink(fn):
406
target = os.readlink(fn)
407
os.symlink(target, bfn)
415
outf = file(bfn, 'wb')
423
"""True if f is an accessible directory."""
425
return S_ISDIR(os.lstat(f)[ST_MODE])
431
"""True if f is a regular file."""
433
return S_ISREG(os.lstat(f)[ST_MODE])
438
"""True if f is a symlink."""
440
return S_ISLNK(os.lstat(f)[ST_MODE])
444
def is_inside(dir, fname):
445
"""True if fname is inside dir.
447
The parameters should typically be passed to osutils.normpath first, so
448
that . and .. and repeated slashes are eliminated, and the separators
449
are canonical for the platform.
451
The empty string as a dir name is taken as top-of-tree and matches
454
>>> is_inside('src', pathjoin('src', 'foo.c'))
456
>>> is_inside('src', 'srccontrol')
458
>>> is_inside('src', pathjoin('src', 'a', 'a', 'a', 'foo.c'))
460
>>> is_inside('foo.c', 'foo.c')
462
>>> is_inside('foo.c', '')
464
>>> is_inside('', 'foo.c')
467
# XXX: Most callers of this can actually do something smarter by
468
# looking at the inventory
478
return fname.startswith(dir)
481
def is_inside_any(dir_list, fname):
482
"""True if fname is inside any of given dirs."""
483
for dirname in dir_list:
484
if is_inside(dirname, fname):
490
def pumpfile(fromfile, tofile):
491
"""Copy contents of one file to another."""
494
b = fromfile.read(BUFSIZE)
500
def file_iterator(input_file, readsize=32768):
502
b = input_file.read(readsize)
509
if hasattr(f, 'tell'):
522
def sha_strings(strings):
523
"""Return the sha-1 of concatenation of strings"""
525
map(s.update, strings)
535
def fingerprint_file(f):
540
return {'size': size,
541
'sha1': s.hexdigest()}
544
def compare_files(a, b):
545
"""Returns true if equal in contents"""
556
def local_time_offset(t=None):
557
"""Return offset of local zone from GMT, either at present or at time t."""
558
# python2.3 localtime() can't take None
562
if time.localtime(t).tm_isdst and time.daylight:
565
return -time.timezone
568
def format_date(t, offset=0, timezone='original', date_fmt=None,
570
## TODO: Perhaps a global option to use either universal or local time?
571
## Or perhaps just let people set $TZ?
572
assert isinstance(t, float)
574
if timezone == 'utc':
577
elif timezone == 'original':
580
tt = time.gmtime(t + offset)
581
elif timezone == 'local':
582
tt = time.localtime(t)
583
offset = local_time_offset(t)
585
raise BzrError("unsupported timezone format %r" % timezone,
586
['options are "utc", "original", "local"'])
588
date_fmt = "%a %Y-%m-%d %H:%M:%S"
590
offset_str = ' %+03d%02d' % (offset / 3600, (offset / 60) % 60)
593
return (time.strftime(date_fmt, tt) + offset_str)
596
def compact_date(when):
597
return time.strftime('%Y%m%d%H%M%S', time.gmtime(when))
602
"""Return size of given open file."""
603
return os.fstat(f.fileno())[ST_SIZE]
606
# Define rand_bytes based on platform.
608
# Python 2.4 and later have os.urandom,
609
# but it doesn't work on some arches
611
rand_bytes = os.urandom
612
except (NotImplementedError, AttributeError):
613
# If python doesn't have os.urandom, or it doesn't work,
614
# then try to first pull random data from /dev/urandom
615
if os.path.exists("/dev/urandom"):
616
rand_bytes = file('/dev/urandom', 'rb').read
617
# Otherwise, use this hack as a last resort
619
# not well seeded, but better than nothing
624
s += chr(random.randint(0, 255))
629
ALNUM = '0123456789abcdefghijklmnopqrstuvwxyz'
631
"""Return a random string of num alphanumeric characters
633
The result only contains lowercase chars because it may be used on
634
case-insensitive filesystems.
637
for raw_byte in rand_bytes(num):
638
s += ALNUM[ord(raw_byte) % 36]
642
## TODO: We could later have path objects that remember their list
643
## decomposition (might be too tricksy though.)
646
"""Turn string into list of parts.
652
>>> splitpath('a/./b')
654
>>> splitpath('a/.b')
656
>>> splitpath('a/../b')
657
Traceback (most recent call last):
659
BzrError: sorry, '..' not allowed in path
661
assert isinstance(p, types.StringTypes)
663
# split on either delimiter because people might use either on
665
ps = re.split(r'[\\/]', p)
670
raise BzrError("sorry, %r not allowed in path" % f)
671
elif (f == '.') or (f == ''):
678
assert isinstance(p, list)
680
if (f == '..') or (f == None) or (f == ''):
681
raise BzrError("sorry, %r not allowed in path" % f)
685
def appendpath(p1, p2):
689
return pathjoin(p1, p2)
693
"""Split s into lines, but without removing the newline characters."""
694
lines = s.split('\n')
695
result = [line + '\n' for line in lines[:-1]]
697
result.append(lines[-1])
701
def hardlinks_good():
702
return sys.platform not in ('win32', 'cygwin', 'darwin')
705
def link_or_copy(src, dest):
706
"""Hardlink a file, or copy it if it can't be hardlinked."""
707
if not hardlinks_good():
712
except (OSError, IOError), e:
713
if e.errno != errno.EXDEV:
717
def delete_any(full_path):
718
"""Delete a file or directory."""
722
# We may be renaming a dangling inventory id
723
if e.errno not in (errno.EISDIR, errno.EACCES, errno.EPERM):
729
if hasattr(os, 'symlink'):
735
def contains_whitespace(s):
736
"""True if there are any whitespace characters in s."""
737
for ch in string.whitespace:
744
def contains_linebreaks(s):
745
"""True if there is any vertical whitespace in s."""
753
def relpath(base, path):
754
"""Return path relative to base, or raise exception.
756
The path may be either an absolute path or a path relative to the
757
current working directory.
759
os.path.commonprefix (python2.4) has a bad bug that it works just
760
on string prefixes, assuming that '/u' is a prefix of '/u2'. This
764
assert len(base) >= MIN_ABS_PATHLENGTH, ('Length of base must be equal or'
765
' exceed the platform minimum length (which is %d)' %
772
while len(head) >= len(base):
775
head, tail = os.path.split(head)
779
raise PathNotChild(rp, base)
787
def urlrelpath(base, path):
788
"""Compute just the relative sub-portion of a url
790
This assumes that both paths are already fully specified file:// URLs.
792
assert len(base) >= MIN_ABS_URLPATHLENGTH, ('Length of base must be equal or'
793
' exceed the platform minimum url length (which is %d)' %
794
MIN_ABS_URLPATHLENGTH)
796
base = local_path_from_url(base)
797
path = local_path_from_url(path)
798
return urlescape(relpath(base, path))
801
def safe_unicode(unicode_or_utf8_string):
802
"""Coerce unicode_or_utf8_string into unicode.
804
If it is unicode, it is returned.
805
Otherwise it is decoded from utf-8. If a decoding error
806
occurs, it is wrapped as a If the decoding fails, the exception is wrapped
807
as a BzrBadParameter exception.
809
if isinstance(unicode_or_utf8_string, unicode):
810
return unicode_or_utf8_string
812
return unicode_or_utf8_string.decode('utf8')
813
except UnicodeDecodeError:
814
raise BzrBadParameterNotUnicode(unicode_or_utf8_string)
817
_platform_normalizes_filenames = False
818
if sys.platform == 'darwin':
819
_platform_normalizes_filenames = True
822
def normalizes_filenames():
823
"""Return True if this platform normalizes unicode filenames.
825
Mac OSX does, Windows/Linux do not.
827
return _platform_normalizes_filenames
830
if _platform_normalizes_filenames:
831
def unicode_filename(path):
832
"""Make sure 'path' is a properly normalized filename.
834
On platforms where the system normalizes filenames (Mac OSX),
835
you can access a file by any path which will normalize
837
Internally, bzr only supports NFC/NFKC normalization, since
838
that is the standard for XML documents.
839
So we return an normalized path, and indicate this has been
842
:return: (path, is_normalized) Return a path which can
843
access the file, and whether or not this path is
846
return unicodedata.normalize('NFKC', path), True
848
def unicode_filename(path):
849
"""Make sure 'path' is a properly normalized filename.
851
On platforms where the system does not normalize filenames
852
(Windows, Linux), you have to access a file by its exact path.
853
Internally, bzr only supports NFC/NFKC normalization, since
854
that is the standard for XML documents.
855
So we return the original path, and indicate if this is
858
:return: (path, is_normalized) Return a path which can
859
access the file, and whether or not this path is
862
return path, unicodedata.normalize('NFKC', path) == path
865
def terminal_width():
866
"""Return estimated terminal width."""
868
# TODO: Do something smart on Windows?
870
# TODO: Is there anything that gets a better update when the window
871
# is resized while the program is running? We could use the Python termcap
874
return int(os.environ['COLUMNS'])
875
except (IndexError, KeyError, ValueError):
878
def supports_executable():
879
return sys.platform != "win32"
882
def strip_url_trailing_slash(path):
883
"""Strip trailing slash, except for root paths.
884
The definition of 'root path' is platform-dependent.
886
assert path.startswith('file:///'), \
887
'strip_url_trailing_slash expects file:// urls (%s)' % path
888
if len(path) != MIN_ABS_URLPATHLENGTH and path[-1] == '/':
894
_validWin32PathRE = re.compile(r'^([A-Za-z]:[/\\])?[^:<>*"?\|]*$')
897
def check_legal_path(path):
898
"""Check whether the supplied path is legal.
899
This is only required on Windows, so we don't test on other platforms
902
if sys.platform != "win32":
904
if _validWin32PathRE.match(path) is None:
905
raise IllegalPath(path)