/brz/remove-bazaar : revision 2294.1.9

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/xml5.py

Committer: John Arbash Meinel
Date: 2007-02-17 22:25:37 UTC
mto: This revision was merged to the branch mainline in revision 2298.
Revision ID: john@arbash-meinel.com-20070217222537-p0hxseinc3x84e96

Minor performance improvement, use None as signal rather than ROOT_ID
for xml5 inventories. Discover small bug in serialized text because of this.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/xml5.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import cStringIO

import re

from bzrlib import (

cache_utf8,

inventory,

)

from bzrlib.xml_serializer import SubElement, Element, Serializer

from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry

from bzrlib.revision import Revision

from bzrlib.errors import BzrError

_utf8_re = None

_unicode_re = None

_xml_escape_map = {

"&":'&',

"'":"'", # FIXME: overkill

"\"":""",

"<":"<",

">":">",

}

def _ensure_utf8_re():

"""Make sure the _utf8_re and _unicode_re regexes have been compiled."""

global _utf8_re, _unicode_re

if _utf8_re is None:

_utf8_re = re.compile('[&<>\'\"]|[\x80-\xff]+')

if _unicode_re is None:

_unicode_re = re.compile(u'[&<>\'\"\u0080-\uffff]')

def _unicode_escape_replace(match, _map=_xml_escape_map):

"""Replace a string of non-ascii, non XML safe characters with their escape

This will escape both Standard XML escapes, like <>"', etc.

As well as escaping non ascii characters, because ElementTree did.

This helps us remain compatible to older versions of bzr. We may change

our policy in the future, though.

"""

# jam 20060816 Benchmarks show that try/KeyError is faster if you

# expect the entity to rarely miss. There is about a 10% difference

# in overall time. But if you miss frequently, then if None is much

# faster. For our use case, we *rarely* have a revision id, file id

# or path name that is unicode. So use try/KeyError.

try:

return _map[match.group()]

except KeyError:

return "&#%d;" % ord(match.group())

def _utf8_escape_replace(match, _map=_xml_escape_map):

"""Escape utf8 characters into XML safe ones.

This uses 2 tricks. It is either escaping "standard" characters, like "&<>,

or it is handling characters with the high-bit set. For ascii characters,

we just lookup the replacement in the dictionary. For everything else, we

decode back into Unicode, and then use the XML escape code.

"""

try:

return _map[match.group()]

except KeyError:

return ''.join('&#%d;' % ord(uni_chr)

for uni_chr in match.group().decode('utf8'))

_to_escaped_map = {}

def _encode_and_escape(unicode_or_utf8_str, _map=_to_escaped_map):

"""Encode the string into utf8, and escape invalid XML characters"""

# We frequently get entities we have not seen before, so it is better

# to check if None, rather than try/KeyError

text = _map.get(unicode_or_utf8_str)

if text is None:

if unicode_or_utf8_str.__class__ == unicode:

# The alternative policy is to do a regular UTF8 encoding

# and then escape only XML meta characters.

# Performance is equivalent once you use cache_utf8. *However*

# this makes the serialized texts incompatible with old versions

# of bzr. So no net gain. (Perhaps the read code would handle utf8

# better than entity escapes, but cElementTree seems to do just fine

# either way)

100

text = str(_unicode_re.sub(_unicode_escape_replace,

101

unicode_or_utf8_str)) + '"'

102

else:

103

# Plain strings are considered to already be in utf-8 so we do a

104

# slightly different method for escaping.

105

text = _utf8_re.sub(_utf8_escape_replace,

106

unicode_or_utf8_str) + '"'

107

_map[unicode_or_utf8_str] = text

108

return text

109

110

111

def _get_utf8_or_ascii(a_str,

112

_encode_utf8=cache_utf8.encode,

113

_get_cached_ascii=cache_utf8.get_cached_ascii):

114

"""Return a cached version of the string.

115

116

cElementTree will return a plain string if the XML is plain ascii. It only

117

returns Unicode when it needs to. We want to work in utf-8 strings. So if

118

cElementTree returns a plain string, we can just return the cached version.

119

If it is Unicode, then we need to encode it.

120

121

:param a_str: An 8-bit string or Unicode as returned by

122

cElementTree.Element.get()

123

:return: A utf-8 encoded 8-bit string.

124

"""

125

# This is fairly optimized because we know what cElementTree does, this is

126

# not meant as a generic function for all cases. Because it is possible for

127

# an 8-bit string to not be ascii or valid utf8.

128

if a_str.__class__ == unicode:

129

return _encode_utf8(a_str)

130

else:

131

return _get_cached_ascii(a_str)

132

133

134

def _clear_cache():

135

"""Clean out the unicode => escaped map"""

136

_to_escaped_map.clear()

137

138

139

class Serializer_v5(Serializer):

140

"""Version 5 serializer

141

142

Packs objects into XML and vice versa.

143

"""

144

145

__slots__ = []

146

147

support_altered_by_hack = True

148

# This format supports the altered-by hack that reads file ids directly out

149

# of the versionedfile, without doing XML parsing.

150

151

def write_inventory_to_string(self, inv):

152

"""Just call write_inventory with a StringIO and return the value"""

153

sio = cStringIO.StringIO()

154

self.write_inventory(inv, sio)

155

return sio.getvalue()

156

157

def write_inventory(self, inv, f):

158

"""Write inventory to a file.

159

160

:param inv: the inventory to write.

161

:param f: the file to write.

162

"""

163

_ensure_utf8_re()

164

output = []

165

append = output.append

166

self._append_inventory_root(append, inv)

167

entries = inv.iter_entries()

168

# Skip the root

169

root_path, root_ie = entries.next()

170

for path, ie in entries:

171

self._append_entry(append, ie)

172

append('</inventory>\n')

173

f.writelines(output)

174

# Just to keep the cache from growing without bounds

175

# but we may actually not want to do clear the cache

176

#_clear_cache()

177

178

def _append_inventory_root(self, append, inv):

179

"""Append the inventory root to output."""

180

append('<inventory')

181

if inv.root.file_id not in (None, ROOT_ID):

182

append(' file_id="')

183

append(_encode_and_escape(inv.root.file_id))

184

append(' format="5"')

185

if inv.revision_id is not None:

186

append(' revision_id="')

187

append(_encode_and_escape(inv.revision_id))

188

append('>\n')

189

190

def _append_entry(self, append, ie):

191

"""Convert InventoryEntry to XML element and append to output."""

192

# TODO: should just be a plain assertion

193

assert InventoryEntry.versionable_kind(ie.kind), \

194

'unsupported entry kind %s' % ie.kind

195

196

append("<")

197

append(ie.kind)

198

if ie.executable:

199

append(' executable="yes"')

200

append(' file_id="')

201

append(_encode_and_escape(ie.file_id))

202

append(' name="')

203

append(_encode_and_escape(ie.name))

204

if self._parent_condition(ie):

205

assert isinstance(ie.parent_id, basestring)

206

append(' parent_id="')

207

append(_encode_and_escape(ie.parent_id))

208

if ie.revision is not None:

209

append(' revision="')

210

append(_encode_and_escape(ie.revision))

211

if ie.symlink_target is not None:

212

append(' symlink_target="')

213

append(_encode_and_escape(ie.symlink_target))

214

if ie.text_sha1 is not None:

215

append(' text_sha1="')

216

append(ie.text_sha1)

217

append('"')

218

if ie.text_size is not None:

219

append(' text_size="%d"' % ie.text_size)

220

append(" />\n")

221

return

222

223

def _parent_condition(self, ie):

224

return ie.parent_id != ROOT_ID

225

226

def _pack_revision(self, rev):

227

"""Revision object -> xml tree"""

228

# For the XML format, we need to write them as Unicode rather than as

229

# utf-8 strings. So that cElementTree can handle properly escaping

230

# them.

231

decode_utf8 = cache_utf8.decode

232

revision_id = rev.revision_id

233

if isinstance(revision_id, str):

234

revision_id = decode_utf8(revision_id)

235

root = Element('revision',

236

committer = rev.committer,

237

timestamp = '%.3f' % rev.timestamp,

238

revision_id = revision_id,

239

inventory_sha1 = rev.inventory_sha1,

240

format='5',

241

)

242

if rev.timezone is not None:

243

root.set('timezone', str(rev.timezone))

244

root.text = '\n'

245

msg = SubElement(root, 'message')

246

msg.text = rev.message

247

msg.tail = '\n'

248

if rev.parent_ids:

249

pelts = SubElement(root, 'parents')

250

pelts.tail = pelts.text = '\n'

251

for parent_id in rev.parent_ids:

252

assert isinstance(parent_id, basestring)

253

p = SubElement(pelts, 'revision_ref')

254

p.tail = '\n'

255

if isinstance(parent_id, str):

256

parent_id = decode_utf8(parent_id)

257

p.set('revision_id', parent_id)

258

if rev.properties:

259

self._pack_revision_properties(rev, root)

260

return root

261

262

def _pack_revision_properties(self, rev, under_element):

263

top_elt = SubElement(under_element, 'properties')

264

for prop_name, prop_value in sorted(rev.properties.items()):

265

assert isinstance(prop_name, basestring)

266

assert isinstance(prop_value, basestring)

267

prop_elt = SubElement(top_elt, 'property')

268

prop_elt.set('name', prop_name)

269

prop_elt.text = prop_value

270

prop_elt.tail = '\n'

271

top_elt.tail = '\n'

272

273

def _unpack_inventory(self, elt):

274

"""Construct from XML Element

275

"""

276

assert elt.tag == 'inventory'

277

root_id = elt.get('file_id') or ROOT_ID

278

format = elt.get('format')

279

if format is not None:

280

if format != '5':

281

raise BzrError("invalid format version %r on inventory"

282

% format)

283

revision_id = elt.get('revision_id')

284

if revision_id is not None:

285

revision_id = cache_utf8.encode(revision_id)

286

inv = Inventory(root_id, revision_id=revision_id)

287

for e in elt:

288

ie = self._unpack_entry(e)

289

if ie.parent_id is None:

290

ie.parent_id = root_id

291

inv.add(ie)

292

return inv

293

294

def _unpack_entry(self, elt):

295

kind = elt.tag

296

if not InventoryEntry.versionable_kind(kind):

297

raise AssertionError('unsupported entry kind %s' % kind)

298

299

get_cached = _get_utf8_or_ascii

300

301

parent_id = elt.get('parent_id')

302

# TODO: jam 20060817 At present, caching file ids costs us too

303

# much time. It slows down overall read performances from

304

# approx 500ms to 700ms. And doesn't improve future reads.

305

# it might be because revision ids and file ids are mixing.

306

# Consider caching *just* the file ids, for a limited period

307

# of time.

308

#parent_id = get_cached(parent_id)

309

#file_id = get_cached(elt.get('file_id'))

310

file_id = elt.get('file_id')

311

312

if kind == 'directory':

313

ie = inventory.InventoryDirectory(file_id,

314

elt.get('name'),

315

parent_id)

316

elif kind == 'file':

317

ie = inventory.InventoryFile(file_id,

318

elt.get('name'),

319

parent_id)

320

ie.text_sha1 = elt.get('text_sha1')

321

if elt.get('executable') == 'yes':

322

ie.executable = True

323

v = elt.get('text_size')

324

ie.text_size = v and int(v)

325

elif kind == 'symlink':

326

ie = inventory.InventoryLink(file_id,

327

elt.get('name'),

328

parent_id)

329

ie.symlink_target = elt.get('symlink_target')

330

else:

331

raise BzrError("unknown kind %r" % kind)

332

revision = elt.get('revision')

333

if revision is not None:

334

revision = get_cached(revision)

335

ie.revision = revision

336

337

return ie

338

339

def _unpack_revision(self, elt):

340

"""XML Element -> Revision object"""

341

assert elt.tag == 'revision'

342

format = elt.get('format')

343

if format is not None:

344

if format != '5':

345

raise BzrError("invalid format version %r on inventory"

346

% format)

347

get_cached = _get_utf8_or_ascii

348

rev = Revision(committer = elt.get('committer'),

349

timestamp = float(elt.get('timestamp')),

350

revision_id = get_cached(elt.get('revision_id')),

351

inventory_sha1 = elt.get('inventory_sha1')

352

)

353

parents = elt.find('parents') or []

354

for p in parents:

355

assert p.tag == 'revision_ref', \

356

"bad parent node tag %r" % p.tag

357

rev.parent_ids.append(get_cached(p.get('revision_id')))

358

self._unpack_revision_properties(elt, rev)

359

v = elt.get('timezone')

360

if v is None:

361

rev.timezone = 0

362

else:

363

rev.timezone = int(v)

364

rev.message = elt.findtext('message') # text of <message>

365

return rev

366

367

def _unpack_revision_properties(self, elt, rev):

368

"""Unpack properties onto a revision."""

369

props_elt = elt.find('properties')

370

assert len(rev.properties) == 0

371

if not props_elt:

372

return

373

for prop_elt in props_elt:

374

assert prop_elt.tag == 'property', \

375

"bad tag under properties list: %r" % prop_elt.tag

376

name = prop_elt.get('name')

377

value = prop_elt.text

378

# If a property had an empty value ('') cElementTree reads

379

# that back as None, convert it back to '', so that all

380

# properties have string values

381

if value is None:

382

value = ''

383

assert name not in rev.properties, \

384

"repeated property %r" % name

385

rev.properties[name] = value

386

387

388

serializer_v5 = Serializer_v5()

Older »