Rev 4444: (Jelmer) Move squashing of XML-invalid characters to XMLSerializer. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Mon Jun 15 21:11:36 BST 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4444
revision-id: pqm at pqm.ubuntu.com-20090615201132-p32zthmt3rjn2tfk
parent: pqm at pqm.ubuntu.com-20090615170647-5zu6h93br8c4ue6i
parent: jelmer at samba.org-20090615191156-s7nuxxxfewe11pwj
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2009-06-15 21:11:32 +0100
message:
(Jelmer) Move squashing of XML-invalid characters to XMLSerializer.
modified:
bzrlib/chk_serializer.py chk_serializer.py-20081002064345-2tofdfj2eqq01h4b-1
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
bzrlib/serializer.py serializer.py-20090402143702-wmkh9cfjhwpju0qi-1
bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
bzrlib/xml4.py xml4.py-20050916091259-db5ab55e7e6ca324
bzrlib/xml8.py xml5.py-20050907032657-aac8f960815b66b1
bzrlib/xml_serializer.py xml.py-20050309040759-57d51586fdec365d
------------------------------------------------------------
revno: 4443.1.2
revision-id: jelmer at samba.org-20090615191156-s7nuxxxfewe11pwj
parent: jelmer at samba.org-20090615190438-3yy3e0gviyej2rtv
committer: Jelmer Vernooij <jelmer at samba.org>
branch nick: bzr.dev
timestamp: Mon 2009-06-15 21:11:56 +0200
message:
Fix comment about XML escaping in tests.
modified:
bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
------------------------------------------------------------
revno: 4443.1.1
revision-id: jelmer at samba.org-20090615190438-3yy3e0gviyej2rtv
parent: pqm at pqm.ubuntu.com-20090615170647-5zu6h93br8c4ue6i
parent: jelmer at samba.org-20090609005951-apv900cdk35o2ygh
committer: Jelmer Vernooij <jelmer at samba.org>
branch nick: bzr.dev
timestamp: Mon 2009-06-15 21:04:38 +0200
message:
Merge in patch to squash XML invalid characters in serializer.
modified:
bzrlib/chk_serializer.py chk_serializer.py-20081002064345-2tofdfj2eqq01h4b-1
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
bzrlib/serializer.py serializer.py-20090402143702-wmkh9cfjhwpju0qi-1
bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
bzrlib/xml4.py xml4.py-20050916091259-db5ab55e7e6ca324
bzrlib/xml8.py xml5.py-20050907032657-aac8f960815b66b1
bzrlib/xml_serializer.py xml.py-20050309040759-57d51586fdec365d
------------------------------------------------------------
revno: 4416.5.1
revision-id: jelmer at samba.org-20090609005951-apv900cdk35o2ygh
parent: pqm at pqm.ubuntu.com-20090606005240-2qms4n1eqdshzp0w
committer: Jelmer Vernooij <jelmer at samba.org>
branch nick: bzr.dev
timestamp: Tue 2009-06-09 02:59:51 +0200
message:
Move squashing of XML-invalid characters to XMLSerializer.
modified:
bzrlib/chk_serializer.py chk_serializer.py-20081002064345-2tofdfj2eqq01h4b-1
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
bzrlib/serializer.py serializer.py-20090402143702-wmkh9cfjhwpju0qi-1
bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
bzrlib/xml4.py xml4.py-20050916091259-db5ab55e7e6ca324
bzrlib/xml8.py xml5.py-20050907032657-aac8f960815b66b1
bzrlib/xml_serializer.py xml.py-20050309040759-57d51586fdec365d
=== modified file 'bzrlib/chk_serializer.py'
--- a/bzrlib/chk_serializer.py 2009-06-10 03:56:49 +0000
+++ b/bzrlib/chk_serializer.py 2009-06-15 19:04:38 +0000
@@ -50,6 +50,8 @@
"""Simple revision serializer based around bencode.
"""
+ squashes_xml_invalid_characters = False
+
# Maps {key:(Revision attribute, bencode_type, validator)}
# This tells us what kind we expect bdecode to create, what variable on
# Revision we should be using, and a function to call to validate/transform
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py 2009-06-10 03:56:49 +0000
+++ b/bzrlib/commit.py 2009-06-15 19:04:38 +0000
@@ -108,9 +108,6 @@
def deleted(self, path):
pass
- def escaped(self, escape_count, message):
- pass
-
def missing(self, path):
pass
@@ -153,9 +150,6 @@
def deleted(self, path):
self._note('deleted %s', path)
- def escaped(self, escape_count, message):
- self._note("replaced %d control characters in message", escape_count)
-
def missing(self, path):
self._note('missing %s', path)
@@ -373,7 +367,6 @@
# Prompt the user for a commit message if none provided
message = message_callback(self)
self.message = message
- self._escape_commit_message()
# Add revision data to the local branch
self.rev_id = self.builder.commit(self.message)
@@ -602,17 +595,6 @@
if self.master_locked:
self.master_branch.unlock()
- def _escape_commit_message(self):
- """Replace xml-incompatible control characters."""
- # FIXME: RBC 20060419 this should be done by the revision
- # serialiser not by commit. Then we can also add an unescaper
- # in the deserializer and start roundtripping revision messages
- # precisely. See repository_implementations/test_repository.py
- self.message, escape_count = xml_serializer.escape_invalid_chars(
- self.message)
- if escape_count:
- self.reporter.escaped(escape_count, self.message)
-
def _gather_parents(self):
"""Record the parents of a merge for merge detection."""
# TODO: Make sure that this list doesn't contain duplicate
=== modified file 'bzrlib/serializer.py'
--- a/bzrlib/serializer.py 2009-06-10 03:56:49 +0000
+++ b/bzrlib/serializer.py 2009-06-15 19:04:38 +0000
@@ -24,6 +24,8 @@
class Serializer(object):
"""Inventory and revision serialization/deserialization."""
+ squashes_xml_invalid_characters = False
+
def write_inventory(self, inv, f):
"""Write inventory to a file"""
raise NotImplementedError(self.write_inventory)
=== modified file 'bzrlib/tests/per_repository/test_repository.py'
--- a/bzrlib/tests/per_repository/test_repository.py 2009-05-11 07:18:30 +0000
+++ b/bzrlib/tests/per_repository/test_repository.py 2009-06-15 19:11:56 +0000
@@ -28,6 +28,7 @@
osutils,
remote,
repository,
+ xml_serializer,
)
from bzrlib.branch import BzrBranchFormat6
from bzrlib.delta import TreeDelta
@@ -527,16 +528,16 @@
tree = self.make_branch_and_tree('.')
tree.commit(message, rev_id='a', allow_pointless=True)
rev = tree.branch.repository.get_revision('a')
- # we have to manually escape this as we dont try to
- # roundtrip xml invalid characters at this point.
- # when escaping is moved to the serialiser, this test
- # can check against the literal message rather than
- # this escaped version.
- escaped_message, escape_count = re.subn(
- u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
- lambda match: match.group(0).encode('unicode_escape'),
- message)
- self.assertEqual(rev.message, escaped_message)
+ if tree.branch.repository._serializer.squashes_xml_invalid_characters:
+ # we have to manually escape this as we dont try to
+ # roundtrip xml invalid characters in the xml-based serializers.
+ escaped_message, escape_count = re.subn(
+ u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
+ lambda match: match.group(0).encode('unicode_escape'),
+ message)
+ self.assertEqual(rev.message, escaped_message)
+ else:
+ self.assertEqual(rev.message, message)
# insist the class is unicode no matter what came in for
# consistency.
self.assertIsInstance(rev.message, unicode)
=== modified file 'bzrlib/xml4.py'
--- a/bzrlib/xml4.py 2009-04-02 15:28:11 +0000
+++ b/bzrlib/xml4.py 2009-06-09 00:59:51 +0000
@@ -19,6 +19,7 @@
ElementTree,
SubElement,
XMLSerializer,
+ escape_invalid_chars,
)
from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
import bzrlib.inventory as inventory
@@ -125,7 +126,7 @@
root.text = '\n'
msg = SubElement(root, 'message')
- msg.text = rev.message
+ msg.text = escape_invalid_chars(rev.message)[0]
msg.tail = '\n'
if rev.parents:
=== modified file 'bzrlib/xml8.py'
--- a/bzrlib/xml8.py 2009-04-08 20:21:06 +0000
+++ b/bzrlib/xml8.py 2009-06-09 00:59:51 +0000
@@ -28,6 +28,7 @@
Element,
SubElement,
XMLSerializer,
+ escape_invalid_chars,
)
from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
from bzrlib.revision import Revision
@@ -345,7 +346,7 @@
root.set('timezone', str(rev.timezone))
root.text = '\n'
msg = SubElement(root, 'message')
- msg.text = rev.message
+ msg.text = escape_invalid_chars(rev.message)[0]
msg.tail = '\n'
if rev.parent_ids:
pelts = SubElement(root, 'parents')
=== modified file 'bzrlib/xml_serializer.py'
--- a/bzrlib/xml_serializer.py 2009-05-12 23:05:21 +0000
+++ b/bzrlib/xml_serializer.py 2009-06-09 00:59:51 +0000
@@ -52,6 +52,8 @@
class XMLSerializer(Serializer):
"""Abstract XML object serialize/deserialize"""
+ squashes_xml_invalid_characters = True
+
def read_inventory_from_string(self, xml_string, revision_id=None,
entry_cache=None):
"""Read xml_string into an inventory object.
@@ -174,6 +176,8 @@
:param message: Commit message to escape
:return: tuple with escaped message and number of characters escaped
"""
+ if message is None:
+ return None, 0
# Python strings can include characters that can't be
# represented in well-formed XML; escape characters that
# aren't listed in the XML specification
More information about the bazaar-commits
mailing list