bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 2116.4.1
by John Arbash Meinel Update file and revision id generators. | 1 | # Copyright (C) 2006 Canonical Ltd
 | 
| 2 | #
 | |
| 3 | # This program is free software; you can redistribute it and/or modify
 | |
| 4 | # it under the terms of the GNU General Public License as published by
 | |
| 5 | # the Free Software Foundation; either version 2 of the License, or
 | |
| 6 | # (at your option) any later version.
 | |
| 7 | #
 | |
| 8 | # This program is distributed in the hope that it will be useful,
 | |
| 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| 11 | # GNU General Public License for more details.
 | |
| 12 | #
 | |
| 13 | # You should have received a copy of the GNU General Public License
 | |
| 14 | # along with this program; if not, write to the Free Software
 | |
| 15 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| 16 | ||
| 17 | """Common code for generating file or revision ids."""
 | |
| 18 | ||
| 19 | from bzrlib.lazy_import import lazy_import | |
| 20 | lazy_import(globals(), """ | |
| 21 | import time
 | |
| 22 | import unicodedata
 | |
| 23 | ||
| 24 | from bzrlib import (
 | |
| 25 |     config,
 | |
| 26 |     errors,
 | |
| 27 |     osutils,
 | |
| 28 |     )
 | |
| 29 | """) | |
| 30 | ||
| 31 | from bzrlib import ( | |
| 32 | lazy_regex, | |
| 33 |     )
 | |
| 34 | ||
| 35 | # the regex removes any weird characters; we don't escape them 
 | |
| 36 | # but rather just pull them out
 | |
| 37 | _file_id_chars_re = lazy_regex.lazy_compile(r'[^\w.]') | |
| 38 | _rev_id_chars_re = lazy_regex.lazy_compile(r'[^-\w.+@]') | |
| 39 | _gen_file_id_suffix = None | |
| 40 | _gen_file_id_serial = 0 | |
| 41 | ||
| 42 | ||
| 43 | def _next_id_suffix(): | |
| 44 | """Create a new file id suffix that is reasonably unique. | |
| 45 |     
 | |
| 46 |     On the first call we combine the current time with 64 bits of randomness to
 | |
| 47 |     give a highly probably globally unique number. Then each call in the same
 | |
| 48 |     process adds 1 to a serial number we append to that unique value.
 | |
| 49 |     """
 | |
| 2568.2.2
by Robert Collins * New method ``_glob_expand_file_list_if_needed`` on the ``Command`` class | 50 |     # XXX TODO: change bzrlib.add.smart_add_tree to call workingtree.add() rather 
 | 
| 2116.4.1
by John Arbash Meinel Update file and revision id generators. | 51 |     # than having to move the id randomness out of the inner loop like this.
 | 
| 52 |     # XXX TODO: for the global randomness this uses we should add the thread-id
 | |
| 53 |     # before the serial #.
 | |
| 54 |     # XXX TODO: jam 20061102 I think it would be good to reset every 100 or
 | |
| 55 |     #           1000 calls, or perhaps if time.time() increases by a certain
 | |
| 56 |     #           amount. time.time() shouldn't be terribly expensive to call,
 | |
| 57 |     #           and it means that long-lived processes wouldn't use the same
 | |
| 58 |     #           suffix forever.
 | |
| 59 | global _gen_file_id_suffix, _gen_file_id_serial | |
| 60 | if _gen_file_id_suffix is None: | |
| 61 | _gen_file_id_suffix = "-%s-%s-" % (osutils.compact_date(time.time()), | |
| 62 | osutils.rand_chars(16)) | |
| 63 | _gen_file_id_serial += 1 | |
| 64 | return _gen_file_id_suffix + str(_gen_file_id_serial) | |
| 65 | ||
| 66 | ||
| 67 | def gen_file_id(name): | |
| 68 | """Return new file id for the basename 'name'. | |
| 69 | ||
| 70 |     The uniqueness is supplied from _next_id_suffix.
 | |
| 71 |     """
 | |
| 72 |     # The real randomness is in the _next_id_suffix, the
 | |
| 73 |     # rest of the identifier is just to be nice.
 | |
| 74 |     # So we:
 | |
| 75 |     # 1) Remove non-ascii word characters to keep the ids portable
 | |
| 76 |     # 2) squash to lowercase, so the file id doesn't have to
 | |
| 77 |     #    be escaped (case insensitive filesystems would bork for ids
 | |
| 78 |     #    that only differ in case without escaping).
 | |
| 79 |     # 3) truncate the filename to 20 chars. Long filenames also bork on some
 | |
| 80 |     #    filesystems
 | |
| 81 |     # 4) Removing starting '.' characters to prevent the file ids from
 | |
| 82 |     #    being considered hidden.
 | |
| 2294.1.10
by John Arbash Meinel Switch all apis over to utf8 file ids. All tests pass | 83 | ascii_word_only = str(_file_id_chars_re.sub('', name.lower())) | 
| 2116.4.1
by John Arbash Meinel Update file and revision id generators. | 84 | short_no_dots = ascii_word_only.lstrip('.')[:20] | 
| 85 | return short_no_dots + _next_id_suffix() | |
| 86 | ||
| 87 | ||
| 88 | def gen_root_id(): | |
| 89 | """Return a new tree-root file id.""" | |
| 90 | return gen_file_id('tree_root') | |
| 91 | ||
| 92 | ||
| 93 | def gen_revision_id(username, timestamp=None): | |
| 94 | """Return new revision-id. | |
| 95 | ||
| 96 |     :param username: This is the value returned by config.username(), which is
 | |
| 97 |         typically a real name, followed by an email address. If found, we will
 | |
| 98 |         use just the email address portion. Otherwise we flatten the real name,
 | |
| 99 |         and use that.
 | |
| 100 |     :return: A new revision id.
 | |
| 101 |     """
 | |
| 102 | try: | |
| 103 | user_or_email = config.extract_email_address(username) | |
| 104 | except errors.NoEmailInUsername: | |
| 105 | user_or_email = username | |
| 106 | ||
| 107 | user_or_email = user_or_email.lower() | |
| 108 | user_or_email = user_or_email.replace(' ', '_') | |
| 109 | user_or_email = _rev_id_chars_re.sub('', user_or_email) | |
| 110 | ||
| 111 |     # This gives 36^16 ~= 2^82.7 ~= 83 bits of entropy
 | |
| 112 | unique_chunk = osutils.rand_chars(16) | |
| 113 | ||
| 114 | if timestamp is None: | |
| 115 | timestamp = time.time() | |
| 116 | ||
| 2249.5.13
by John Arbash Meinel Finish auditing Repository, and fix generate_ids to always generate utf8 ids. | 117 | rev_id = u'-'.join((user_or_email, | 
| 118 | osutils.compact_date(timestamp), | |
| 119 | unique_chunk)) | |
| 120 | return rev_id.encode('utf8') |